| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
| Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 17 | // Package kvmdevice implements a Kubernetes device plugin for the virtual KVM |
| 18 | // device. Using the device plugin API allows us to take advantage of the |
| 19 | // scheduler to locate pods on machines eligible for KVM and also allows |
| 20 | // granular access control to KVM using quotas instead of needing privileged |
| 21 | // access. |
| 22 | // Since KVM devices are virtual, this plugin emulates a huge number of them so |
| 23 | // that we never run out. |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 24 | package kvmdevice |
| 25 | |
| 26 | import ( |
| 27 | "bytes" |
| 28 | "context" |
| Tim Windelschmidt | d5f851b | 2024-04-23 14:59:37 +0200 | [diff] [blame] | 29 | "errors" |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 30 | "fmt" |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 31 | "net" |
| 32 | "os" |
| 33 | "strconv" |
| 34 | "strings" |
| 35 | |
| 36 | "golang.org/x/sys/unix" |
| 37 | "google.golang.org/grpc" |
| Lorenz Brun | 99d210d | 2021-05-17 15:29:18 +0200 | [diff] [blame] | 38 | corev1 "k8s.io/api/core/v1" |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 39 | deviceplugin "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" |
| Lorenz Brun | 6211e4d | 2023-11-14 19:09:40 +0100 | [diff] [blame] | 40 | pluginregistration "k8s.io/kubelet/pkg/apis/pluginregistration/v1" |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 41 | |
| Serge Bazanski | 3c5d063 | 2024-09-12 10:49:12 +0000 | [diff] [blame] | 42 | "source.monogon.dev/go/logging" |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 43 | "source.monogon.dev/metropolis/node/core/localstorage" |
| Tim Windelschmidt | 9f21f53 | 2024-05-07 15:14:20 +0200 | [diff] [blame] | 44 | "source.monogon.dev/osbase/supervisor" |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 45 | ) |
| 46 | |
| Lorenz Brun | 99d210d | 2021-05-17 15:29:18 +0200 | [diff] [blame] | 47 | // Name is the name of the KVM devices this plugin exposes |
| 48 | var Name corev1.ResourceName = "devices.monogon.dev/kvm" |
| 49 | |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 50 | type Plugin struct { |
| 51 | *deviceplugin.UnimplementedDevicePluginServer |
| 52 | KubeletDirectory *localstorage.DataKubernetesKubeletDirectory |
| 53 | |
| Serge Bazanski | 3c5d063 | 2024-09-12 10:49:12 +0000 | [diff] [blame] | 54 | logger logging.Leveled |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 55 | } |
| 56 | |
| 57 | func (k *Plugin) GetInfo(context.Context, *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) { |
| 58 | return &pluginregistration.PluginInfo{ |
| 59 | Type: pluginregistration.DevicePlugin, |
| Lorenz Brun | 99d210d | 2021-05-17 15:29:18 +0200 | [diff] [blame] | 60 | Name: string(Name), |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 61 | Endpoint: k.KubeletDirectory.Plugins.KVM.FullPath(), |
| 62 | SupportedVersions: []string{"v1beta1"}, |
| 63 | }, nil |
| 64 | } |
| 65 | |
| 66 | func (k *Plugin) NotifyRegistrationStatus(ctx context.Context, req *pluginregistration.RegistrationStatus) (*pluginregistration.RegistrationStatusResponse, error) { |
| 67 | if !req.PluginRegistered { |
| 68 | k.logger.Errorf("KVM plugin failed to register: %v", req.Error) |
| 69 | } |
| 70 | return &pluginregistration.RegistrationStatusResponse{}, nil |
| 71 | } |
| 72 | |
| 73 | func (k *Plugin) GetDevicePluginOptions(context.Context, *deviceplugin.Empty) (*deviceplugin.DevicePluginOptions, error) { |
| 74 | return &deviceplugin.DevicePluginOptions{ |
| 75 | GetPreferredAllocationAvailable: false, |
| 76 | PreStartRequired: false, |
| 77 | }, nil |
| 78 | } |
| 79 | |
| 80 | func (k *Plugin) ListAndWatch(req *deviceplugin.Empty, s deviceplugin.DevicePlugin_ListAndWatchServer) error { |
| 81 | var devs []*deviceplugin.Device |
| 82 | |
| 83 | // TODO(T963): Get this value from Kubelet configuration (or something higher-level?) |
| 84 | for i := 0; i < 256; i++ { |
| 85 | devs = append(devs, &deviceplugin.Device{ |
| 86 | ID: fmt.Sprintf("kvm%v", i), |
| 87 | Health: deviceplugin.Healthy, |
| 88 | }) |
| 89 | } |
| 90 | |
| 91 | s.Send(&deviceplugin.ListAndWatchResponse{Devices: devs}) |
| 92 | |
| 93 | <-s.Context().Done() |
| 94 | return nil |
| 95 | } |
| 96 | |
| 97 | func (k *Plugin) Allocate(ctx context.Context, req *deviceplugin.AllocateRequest) (*deviceplugin.AllocateResponse, error) { |
| 98 | var response deviceplugin.AllocateResponse |
| 99 | |
| 100 | for _, req := range req.ContainerRequests { |
| 101 | var devices []*deviceplugin.DeviceSpec |
| 102 | for range req.DevicesIDs { |
| 103 | dev := new(deviceplugin.DeviceSpec) |
| 104 | dev.HostPath = "/dev/kvm" |
| 105 | dev.ContainerPath = "/dev/kvm" |
| 106 | dev.Permissions = "rw" |
| 107 | devices = append(devices, dev) |
| 108 | } |
| 109 | response.ContainerResponses = append(response.ContainerResponses, &deviceplugin.ContainerAllocateResponse{ |
| 110 | Devices: devices}) |
| 111 | } |
| 112 | |
| 113 | return &response, nil |
| 114 | } |
| 115 | |
| Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 116 | // deviceNumberFromString gets a Linux device number from a string containing |
| 117 | // two decimal numbers representing the major and minor device numbers |
| 118 | // separated by a colon. Whitespace is ignored. |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 119 | func deviceNumberFromString(s string) (uint64, error) { |
| 120 | kvmDevParts := strings.Split(s, ":") |
| 121 | if len(kvmDevParts) != 2 { |
| 122 | return 0, fmt.Errorf("device file spec contains an invalid number of colons: `%v`", s) |
| 123 | } |
| 124 | major, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[0]), 10, 32) |
| 125 | if err != nil { |
| 126 | return 0, fmt.Errorf("failed to convert major number to an integer: %w", err) |
| 127 | } |
| 128 | minor, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[1]), 10, 32) |
| 129 | if err != nil { |
| 130 | return 0, fmt.Errorf("failed to convert minor number to an integer: %w", err) |
| 131 | } |
| 132 | |
| 133 | return unix.Mkdev(uint32(major), uint32(minor)), nil |
| 134 | } |
| 135 | |
| 136 | func (k *Plugin) Run(ctx context.Context) error { |
| 137 | k.logger = supervisor.Logger(ctx) |
| 138 | |
| Lorenz Brun | 764a2de | 2021-11-22 16:26:36 +0100 | [diff] [blame] | 139 | l1tfStatus, err := os.ReadFile("/sys/devices/system/cpu/vulnerabilities/l1tf") |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 140 | if err != nil && !os.IsNotExist(err) { |
| Tim Windelschmidt | 5f1a7de | 2024-09-19 02:00:14 +0200 | [diff] [blame] | 141 | return fmt.Errorf("failed to query for CPU vulnerabilities: %w", err) |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 142 | } |
| 143 | |
| 144 | if bytes.Contains(l1tfStatus, []byte("vulnerable")) { |
| 145 | k.logger.Warning("CPU is vulnerable to L1TF, not exposing KVM.") |
| 146 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
| 147 | supervisor.Signal(ctx, supervisor.SignalDone) |
| 148 | return nil |
| 149 | } |
| 150 | |
| Lorenz Brun | 764a2de | 2021-11-22 16:26:36 +0100 | [diff] [blame] | 151 | kvmDevRaw, err := os.ReadFile("/sys/devices/virtual/misc/kvm/dev") |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 152 | if err != nil { |
| 153 | k.logger.Warning("KVM is not available. Check firmware settings and CPU.") |
| 154 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
| 155 | supervisor.Signal(ctx, supervisor.SignalDone) |
| Tim Windelschmidt | 6e5b8a5 | 2024-04-17 02:34:07 +0200 | [diff] [blame] | 156 | //nolint:returnerrcheck |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 157 | return nil |
| 158 | } |
| 159 | |
| 160 | kvmDevNode, err := deviceNumberFromString(string(kvmDevRaw)) |
| Tim Windelschmidt | 096654a | 2024-04-18 23:10:19 +0200 | [diff] [blame] | 161 | if err != nil { |
| 162 | return fmt.Errorf("failed to parse KVM device node: %w", err) |
| 163 | } |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 164 | |
| 165 | err = unix.Mknod("/dev/kvm", 0660, int(kvmDevNode)) |
| Lorenz Brun | 2d83a12 | 2024-05-06 14:38:32 +0200 | [diff] [blame] | 166 | if err != nil && !errors.Is(err, unix.EEXIST) { |
| Tim Windelschmidt | 5f1a7de | 2024-09-19 02:00:14 +0200 | [diff] [blame] | 167 | return fmt.Errorf("failed to create KVM device node: %w", err) |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 168 | } |
| 169 | |
| Lorenz Brun | 4599aa2 | 2023-06-28 13:09:32 +0200 | [diff] [blame] | 170 | // Try to remove socket if an unclean shutdown happened |
| 171 | os.Remove(k.KubeletDirectory.Plugins.KVM.FullPath()) |
| 172 | |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 173 | pluginListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.Plugins.KVM.FullPath(), Net: "unix"}) |
| 174 | if err != nil { |
| 175 | return fmt.Errorf("failed to listen on device plugin socket: %w", err) |
| 176 | } |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 177 | |
| 178 | pluginServer := grpc.NewServer() |
| 179 | deviceplugin.RegisterDevicePluginServer(pluginServer, k) |
| 180 | if err := supervisor.Run(ctx, "kvm-device", supervisor.GRPCServer(pluginServer, pluginListener, false)); err != nil { |
| 181 | return err |
| 182 | } |
| 183 | |
| Lorenz Brun | 4599aa2 | 2023-06-28 13:09:32 +0200 | [diff] [blame] | 184 | // Try to remove socket if an unclean shutdown happened |
| 185 | os.Remove(k.KubeletDirectory.PluginsRegistry.KVMReg.FullPath()) |
| 186 | |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 187 | registrationListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.PluginsRegistry.KVMReg.FullPath(), Net: "unix"}) |
| 188 | if err != nil { |
| 189 | return fmt.Errorf("failed to listen on registration socket: %w", err) |
| 190 | } |
| Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 191 | |
| 192 | registrationServer := grpc.NewServer() |
| 193 | pluginregistration.RegisterRegistrationServer(registrationServer, k) |
| 194 | if err := supervisor.Run(ctx, "registration", supervisor.GRPCServer(registrationServer, registrationListener, true)); err != nil { |
| 195 | return err |
| 196 | } |
| 197 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
| 198 | supervisor.Signal(ctx, supervisor.SignalDone) |
| 199 | return nil |
| 200 | } |