blob: d29112f9bc144b02571cf840d0172abfc03b2eac [file] [log] [blame]
Lorenz Brun4e090352021-03-17 17:44:41 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanski216fe7b2021-05-21 18:36:16 +020017// Package kvmdevice implements a Kubernetes device plugin for the virtual KVM
18// device. Using the device plugin API allows us to take advantage of the
19// scheduler to locate pods on machines eligible for KVM and also allows
20// granular access control to KVM using quotas instead of needing privileged
21// access.
22// Since KVM devices are virtual, this plugin emulates a huge number of them so
23// that we never run out.
Lorenz Brun4e090352021-03-17 17:44:41 +010024package kvmdevice
25
26import (
27 "bytes"
28 "context"
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020029 "errors"
Lorenz Brun4e090352021-03-17 17:44:41 +010030 "fmt"
Lorenz Brun4e090352021-03-17 17:44:41 +010031 "net"
32 "os"
33 "strconv"
34 "strings"
35
36 "golang.org/x/sys/unix"
37 "google.golang.org/grpc"
Lorenz Brun99d210d2021-05-17 15:29:18 +020038 corev1 "k8s.io/api/core/v1"
Lorenz Brun4e090352021-03-17 17:44:41 +010039 deviceplugin "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
Lorenz Brun6211e4d2023-11-14 19:09:40 +010040 pluginregistration "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
Lorenz Brun4e090352021-03-17 17:44:41 +010041
Serge Bazanski3c5d0632024-09-12 10:49:12 +000042 "source.monogon.dev/go/logging"
Lorenz Brun4e090352021-03-17 17:44:41 +010043 "source.monogon.dev/metropolis/node/core/localstorage"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020044 "source.monogon.dev/osbase/supervisor"
Lorenz Brun4e090352021-03-17 17:44:41 +010045)
46
Lorenz Brun99d210d2021-05-17 15:29:18 +020047// Name is the name of the KVM devices this plugin exposes
48var Name corev1.ResourceName = "devices.monogon.dev/kvm"
49
Lorenz Brun4e090352021-03-17 17:44:41 +010050type Plugin struct {
51 *deviceplugin.UnimplementedDevicePluginServer
52 KubeletDirectory *localstorage.DataKubernetesKubeletDirectory
53
Serge Bazanski3c5d0632024-09-12 10:49:12 +000054 logger logging.Leveled
Lorenz Brun4e090352021-03-17 17:44:41 +010055}
56
57func (k *Plugin) GetInfo(context.Context, *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) {
58 return &pluginregistration.PluginInfo{
59 Type: pluginregistration.DevicePlugin,
Lorenz Brun99d210d2021-05-17 15:29:18 +020060 Name: string(Name),
Lorenz Brun4e090352021-03-17 17:44:41 +010061 Endpoint: k.KubeletDirectory.Plugins.KVM.FullPath(),
62 SupportedVersions: []string{"v1beta1"},
63 }, nil
64}
65
66func (k *Plugin) NotifyRegistrationStatus(ctx context.Context, req *pluginregistration.RegistrationStatus) (*pluginregistration.RegistrationStatusResponse, error) {
67 if !req.PluginRegistered {
68 k.logger.Errorf("KVM plugin failed to register: %v", req.Error)
69 }
70 return &pluginregistration.RegistrationStatusResponse{}, nil
71}
72
73func (k *Plugin) GetDevicePluginOptions(context.Context, *deviceplugin.Empty) (*deviceplugin.DevicePluginOptions, error) {
74 return &deviceplugin.DevicePluginOptions{
75 GetPreferredAllocationAvailable: false,
76 PreStartRequired: false,
77 }, nil
78}
79
80func (k *Plugin) ListAndWatch(req *deviceplugin.Empty, s deviceplugin.DevicePlugin_ListAndWatchServer) error {
81 var devs []*deviceplugin.Device
82
83 // TODO(T963): Get this value from Kubelet configuration (or something higher-level?)
84 for i := 0; i < 256; i++ {
85 devs = append(devs, &deviceplugin.Device{
86 ID: fmt.Sprintf("kvm%v", i),
87 Health: deviceplugin.Healthy,
88 })
89 }
90
91 s.Send(&deviceplugin.ListAndWatchResponse{Devices: devs})
92
93 <-s.Context().Done()
94 return nil
95}
96
97func (k *Plugin) Allocate(ctx context.Context, req *deviceplugin.AllocateRequest) (*deviceplugin.AllocateResponse, error) {
98 var response deviceplugin.AllocateResponse
99
100 for _, req := range req.ContainerRequests {
101 var devices []*deviceplugin.DeviceSpec
102 for range req.DevicesIDs {
103 dev := new(deviceplugin.DeviceSpec)
104 dev.HostPath = "/dev/kvm"
105 dev.ContainerPath = "/dev/kvm"
106 dev.Permissions = "rw"
107 devices = append(devices, dev)
108 }
109 response.ContainerResponses = append(response.ContainerResponses, &deviceplugin.ContainerAllocateResponse{
110 Devices: devices})
111 }
112
113 return &response, nil
114}
115
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200116// deviceNumberFromString gets a Linux device number from a string containing
117// two decimal numbers representing the major and minor device numbers
118// separated by a colon. Whitespace is ignored.
Lorenz Brun4e090352021-03-17 17:44:41 +0100119func deviceNumberFromString(s string) (uint64, error) {
120 kvmDevParts := strings.Split(s, ":")
121 if len(kvmDevParts) != 2 {
122 return 0, fmt.Errorf("device file spec contains an invalid number of colons: `%v`", s)
123 }
124 major, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[0]), 10, 32)
125 if err != nil {
126 return 0, fmt.Errorf("failed to convert major number to an integer: %w", err)
127 }
128 minor, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[1]), 10, 32)
129 if err != nil {
130 return 0, fmt.Errorf("failed to convert minor number to an integer: %w", err)
131 }
132
133 return unix.Mkdev(uint32(major), uint32(minor)), nil
134}
135
136func (k *Plugin) Run(ctx context.Context) error {
137 k.logger = supervisor.Logger(ctx)
138
Lorenz Brun764a2de2021-11-22 16:26:36 +0100139 l1tfStatus, err := os.ReadFile("/sys/devices/system/cpu/vulnerabilities/l1tf")
Lorenz Brun4e090352021-03-17 17:44:41 +0100140 if err != nil && !os.IsNotExist(err) {
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200141 return fmt.Errorf("failed to query for CPU vulnerabilities: %w", err)
Lorenz Brun4e090352021-03-17 17:44:41 +0100142 }
143
144 if bytes.Contains(l1tfStatus, []byte("vulnerable")) {
145 k.logger.Warning("CPU is vulnerable to L1TF, not exposing KVM.")
146 supervisor.Signal(ctx, supervisor.SignalHealthy)
147 supervisor.Signal(ctx, supervisor.SignalDone)
148 return nil
149 }
150
Lorenz Brun764a2de2021-11-22 16:26:36 +0100151 kvmDevRaw, err := os.ReadFile("/sys/devices/virtual/misc/kvm/dev")
Lorenz Brun4e090352021-03-17 17:44:41 +0100152 if err != nil {
153 k.logger.Warning("KVM is not available. Check firmware settings and CPU.")
154 supervisor.Signal(ctx, supervisor.SignalHealthy)
155 supervisor.Signal(ctx, supervisor.SignalDone)
Tim Windelschmidt6e5b8a52024-04-17 02:34:07 +0200156 //nolint:returnerrcheck
Lorenz Brun4e090352021-03-17 17:44:41 +0100157 return nil
158 }
159
160 kvmDevNode, err := deviceNumberFromString(string(kvmDevRaw))
Tim Windelschmidt096654a2024-04-18 23:10:19 +0200161 if err != nil {
162 return fmt.Errorf("failed to parse KVM device node: %w", err)
163 }
Lorenz Brun4e090352021-03-17 17:44:41 +0100164
165 err = unix.Mknod("/dev/kvm", 0660, int(kvmDevNode))
Lorenz Brun2d83a122024-05-06 14:38:32 +0200166 if err != nil && !errors.Is(err, unix.EEXIST) {
Tim Windelschmidt5f1a7de2024-09-19 02:00:14 +0200167 return fmt.Errorf("failed to create KVM device node: %w", err)
Lorenz Brun4e090352021-03-17 17:44:41 +0100168 }
169
Lorenz Brun4599aa22023-06-28 13:09:32 +0200170 // Try to remove socket if an unclean shutdown happened
171 os.Remove(k.KubeletDirectory.Plugins.KVM.FullPath())
172
Lorenz Brun4e090352021-03-17 17:44:41 +0100173 pluginListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.Plugins.KVM.FullPath(), Net: "unix"})
174 if err != nil {
175 return fmt.Errorf("failed to listen on device plugin socket: %w", err)
176 }
Lorenz Brun4e090352021-03-17 17:44:41 +0100177
178 pluginServer := grpc.NewServer()
179 deviceplugin.RegisterDevicePluginServer(pluginServer, k)
180 if err := supervisor.Run(ctx, "kvm-device", supervisor.GRPCServer(pluginServer, pluginListener, false)); err != nil {
181 return err
182 }
183
Lorenz Brun4599aa22023-06-28 13:09:32 +0200184 // Try to remove socket if an unclean shutdown happened
185 os.Remove(k.KubeletDirectory.PluginsRegistry.KVMReg.FullPath())
186
Lorenz Brun4e090352021-03-17 17:44:41 +0100187 registrationListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.PluginsRegistry.KVMReg.FullPath(), Net: "unix"})
188 if err != nil {
189 return fmt.Errorf("failed to listen on registration socket: %w", err)
190 }
Lorenz Brun4e090352021-03-17 17:44:41 +0100191
192 registrationServer := grpc.NewServer()
193 pluginregistration.RegisterRegistrationServer(registrationServer, k)
194 if err := supervisor.Run(ctx, "registration", supervisor.GRPCServer(registrationServer, registrationListener, true)); err != nil {
195 return err
196 }
197 supervisor.Signal(ctx, supervisor.SignalHealthy)
198 supervisor.Signal(ctx, supervisor.SignalDone)
199 return nil
200}