Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 17 | // Package kvmdevice implements a Kubernetes device plugin for the virtual KVM |
| 18 | // device. Using the device plugin API allows us to take advantage of the |
| 19 | // scheduler to locate pods on machines eligible for KVM and also allows |
| 20 | // granular access control to KVM using quotas instead of needing privileged |
| 21 | // access. |
| 22 | // Since KVM devices are virtual, this plugin emulates a huge number of them so |
| 23 | // that we never run out. |
Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 24 | package kvmdevice |
| 25 | |
| 26 | import ( |
| 27 | "bytes" |
| 28 | "context" |
| 29 | "fmt" |
Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 30 | "net" |
| 31 | "os" |
| 32 | "strconv" |
| 33 | "strings" |
| 34 | |
| 35 | "golang.org/x/sys/unix" |
| 36 | "google.golang.org/grpc" |
Lorenz Brun | 99d210d | 2021-05-17 15:29:18 +0200 | [diff] [blame] | 37 | corev1 "k8s.io/api/core/v1" |
Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 38 | deviceplugin "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" |
| 39 | "k8s.io/kubelet/pkg/apis/pluginregistration/v1" |
| 40 | |
| 41 | "source.monogon.dev/metropolis/node/core/localstorage" |
| 42 | "source.monogon.dev/metropolis/pkg/logtree" |
| 43 | "source.monogon.dev/metropolis/pkg/supervisor" |
| 44 | ) |
| 45 | |
Lorenz Brun | 99d210d | 2021-05-17 15:29:18 +0200 | [diff] [blame] | 46 | // Name is the name of the KVM devices this plugin exposes |
| 47 | var Name corev1.ResourceName = "devices.monogon.dev/kvm" |
| 48 | |
Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 49 | type Plugin struct { |
| 50 | *deviceplugin.UnimplementedDevicePluginServer |
| 51 | KubeletDirectory *localstorage.DataKubernetesKubeletDirectory |
| 52 | |
| 53 | logger logtree.LeveledLogger |
| 54 | } |
| 55 | |
| 56 | func (k *Plugin) GetInfo(context.Context, *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) { |
| 57 | return &pluginregistration.PluginInfo{ |
| 58 | Type: pluginregistration.DevicePlugin, |
Lorenz Brun | 99d210d | 2021-05-17 15:29:18 +0200 | [diff] [blame] | 59 | Name: string(Name), |
Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 60 | Endpoint: k.KubeletDirectory.Plugins.KVM.FullPath(), |
| 61 | SupportedVersions: []string{"v1beta1"}, |
| 62 | }, nil |
| 63 | } |
| 64 | |
| 65 | func (k *Plugin) NotifyRegistrationStatus(ctx context.Context, req *pluginregistration.RegistrationStatus) (*pluginregistration.RegistrationStatusResponse, error) { |
| 66 | if !req.PluginRegistered { |
| 67 | k.logger.Errorf("KVM plugin failed to register: %v", req.Error) |
| 68 | } |
| 69 | return &pluginregistration.RegistrationStatusResponse{}, nil |
| 70 | } |
| 71 | |
| 72 | func (k *Plugin) GetDevicePluginOptions(context.Context, *deviceplugin.Empty) (*deviceplugin.DevicePluginOptions, error) { |
| 73 | return &deviceplugin.DevicePluginOptions{ |
| 74 | GetPreferredAllocationAvailable: false, |
| 75 | PreStartRequired: false, |
| 76 | }, nil |
| 77 | } |
| 78 | |
| 79 | func (k *Plugin) ListAndWatch(req *deviceplugin.Empty, s deviceplugin.DevicePlugin_ListAndWatchServer) error { |
| 80 | var devs []*deviceplugin.Device |
| 81 | |
| 82 | // TODO(T963): Get this value from Kubelet configuration (or something higher-level?) |
| 83 | for i := 0; i < 256; i++ { |
| 84 | devs = append(devs, &deviceplugin.Device{ |
| 85 | ID: fmt.Sprintf("kvm%v", i), |
| 86 | Health: deviceplugin.Healthy, |
| 87 | }) |
| 88 | } |
| 89 | |
| 90 | s.Send(&deviceplugin.ListAndWatchResponse{Devices: devs}) |
| 91 | |
| 92 | <-s.Context().Done() |
| 93 | return nil |
| 94 | } |
| 95 | |
| 96 | func (k *Plugin) Allocate(ctx context.Context, req *deviceplugin.AllocateRequest) (*deviceplugin.AllocateResponse, error) { |
| 97 | var response deviceplugin.AllocateResponse |
| 98 | |
| 99 | for _, req := range req.ContainerRequests { |
| 100 | var devices []*deviceplugin.DeviceSpec |
| 101 | for range req.DevicesIDs { |
| 102 | dev := new(deviceplugin.DeviceSpec) |
| 103 | dev.HostPath = "/dev/kvm" |
| 104 | dev.ContainerPath = "/dev/kvm" |
| 105 | dev.Permissions = "rw" |
| 106 | devices = append(devices, dev) |
| 107 | } |
| 108 | response.ContainerResponses = append(response.ContainerResponses, &deviceplugin.ContainerAllocateResponse{ |
| 109 | Devices: devices}) |
| 110 | } |
| 111 | |
| 112 | return &response, nil |
| 113 | } |
| 114 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 115 | // deviceNumberFromString gets a Linux device number from a string containing |
| 116 | // two decimal numbers representing the major and minor device numbers |
| 117 | // separated by a colon. Whitespace is ignored. |
Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 118 | func deviceNumberFromString(s string) (uint64, error) { |
| 119 | kvmDevParts := strings.Split(s, ":") |
| 120 | if len(kvmDevParts) != 2 { |
| 121 | return 0, fmt.Errorf("device file spec contains an invalid number of colons: `%v`", s) |
| 122 | } |
| 123 | major, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[0]), 10, 32) |
| 124 | if err != nil { |
| 125 | return 0, fmt.Errorf("failed to convert major number to an integer: %w", err) |
| 126 | } |
| 127 | minor, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[1]), 10, 32) |
| 128 | if err != nil { |
| 129 | return 0, fmt.Errorf("failed to convert minor number to an integer: %w", err) |
| 130 | } |
| 131 | |
| 132 | return unix.Mkdev(uint32(major), uint32(minor)), nil |
| 133 | } |
| 134 | |
| 135 | func (k *Plugin) Run(ctx context.Context) error { |
| 136 | k.logger = supervisor.Logger(ctx) |
| 137 | |
Lorenz Brun | 764a2de | 2021-11-22 16:26:36 +0100 | [diff] [blame] | 138 | l1tfStatus, err := os.ReadFile("/sys/devices/system/cpu/vulnerabilities/l1tf") |
Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 139 | if err != nil && !os.IsNotExist(err) { |
| 140 | return fmt.Errorf("failed to query for CPU vulnerabilities: %v", err) |
| 141 | } |
| 142 | |
| 143 | if bytes.Contains(l1tfStatus, []byte("vulnerable")) { |
| 144 | k.logger.Warning("CPU is vulnerable to L1TF, not exposing KVM.") |
| 145 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
| 146 | supervisor.Signal(ctx, supervisor.SignalDone) |
| 147 | return nil |
| 148 | } |
| 149 | |
Lorenz Brun | 764a2de | 2021-11-22 16:26:36 +0100 | [diff] [blame] | 150 | kvmDevRaw, err := os.ReadFile("/sys/devices/virtual/misc/kvm/dev") |
Lorenz Brun | 4e09035 | 2021-03-17 17:44:41 +0100 | [diff] [blame] | 151 | if err != nil { |
| 152 | k.logger.Warning("KVM is not available. Check firmware settings and CPU.") |
| 153 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
| 154 | supervisor.Signal(ctx, supervisor.SignalDone) |
| 155 | return nil |
| 156 | } |
| 157 | |
| 158 | kvmDevNode, err := deviceNumberFromString(string(kvmDevRaw)) |
| 159 | |
| 160 | err = unix.Mknod("/dev/kvm", 0660, int(kvmDevNode)) |
| 161 | if err != nil && err != unix.EEXIST { |
| 162 | return fmt.Errorf("failed to create KVM device node: %v", err) |
| 163 | } |
| 164 | |
| 165 | pluginListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.Plugins.KVM.FullPath(), Net: "unix"}) |
| 166 | if err != nil { |
| 167 | return fmt.Errorf("failed to listen on device plugin socket: %w", err) |
| 168 | } |
| 169 | pluginListener.SetUnlinkOnClose(true) |
| 170 | |
| 171 | pluginServer := grpc.NewServer() |
| 172 | deviceplugin.RegisterDevicePluginServer(pluginServer, k) |
| 173 | if err := supervisor.Run(ctx, "kvm-device", supervisor.GRPCServer(pluginServer, pluginListener, false)); err != nil { |
| 174 | return err |
| 175 | } |
| 176 | |
| 177 | registrationListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.PluginsRegistry.KVMReg.FullPath(), Net: "unix"}) |
| 178 | if err != nil { |
| 179 | return fmt.Errorf("failed to listen on registration socket: %w", err) |
| 180 | } |
| 181 | registrationListener.SetUnlinkOnClose(true) |
| 182 | |
| 183 | registrationServer := grpc.NewServer() |
| 184 | pluginregistration.RegisterRegistrationServer(registrationServer, k) |
| 185 | if err := supervisor.Run(ctx, "registration", supervisor.GRPCServer(registrationServer, registrationListener, true)); err != nil { |
| 186 | return err |
| 187 | } |
| 188 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
| 189 | supervisor.Signal(ctx, supervisor.SignalDone) |
| 190 | return nil |
| 191 | } |