blob: c9a6a793d2a2948f7b44c93d2ec1d5b47840c00b [file] [log] [blame]
Lorenz Brun4e090352021-03-17 17:44:41 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanski216fe7b2021-05-21 18:36:16 +020017// Package kvmdevice implements a Kubernetes device plugin for the virtual KVM
18// device. Using the device plugin API allows us to take advantage of the
19// scheduler to locate pods on machines eligible for KVM and also allows
20// granular access control to KVM using quotas instead of needing privileged
21// access.
22// Since KVM devices are virtual, this plugin emulates a huge number of them so
23// that we never run out.
Lorenz Brun4e090352021-03-17 17:44:41 +010024package kvmdevice
25
26import (
27 "bytes"
28 "context"
29 "fmt"
Lorenz Brun4e090352021-03-17 17:44:41 +010030 "net"
31 "os"
32 "strconv"
33 "strings"
34
35 "golang.org/x/sys/unix"
36 "google.golang.org/grpc"
Lorenz Brun99d210d2021-05-17 15:29:18 +020037 corev1 "k8s.io/api/core/v1"
Lorenz Brun4e090352021-03-17 17:44:41 +010038 deviceplugin "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
39 "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
40
41 "source.monogon.dev/metropolis/node/core/localstorage"
42 "source.monogon.dev/metropolis/pkg/logtree"
43 "source.monogon.dev/metropolis/pkg/supervisor"
44)
45
Lorenz Brun99d210d2021-05-17 15:29:18 +020046// Name is the name of the KVM devices this plugin exposes
47var Name corev1.ResourceName = "devices.monogon.dev/kvm"
48
Lorenz Brun4e090352021-03-17 17:44:41 +010049type Plugin struct {
50 *deviceplugin.UnimplementedDevicePluginServer
51 KubeletDirectory *localstorage.DataKubernetesKubeletDirectory
52
53 logger logtree.LeveledLogger
54}
55
56func (k *Plugin) GetInfo(context.Context, *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) {
57 return &pluginregistration.PluginInfo{
58 Type: pluginregistration.DevicePlugin,
Lorenz Brun99d210d2021-05-17 15:29:18 +020059 Name: string(Name),
Lorenz Brun4e090352021-03-17 17:44:41 +010060 Endpoint: k.KubeletDirectory.Plugins.KVM.FullPath(),
61 SupportedVersions: []string{"v1beta1"},
62 }, nil
63}
64
65func (k *Plugin) NotifyRegistrationStatus(ctx context.Context, req *pluginregistration.RegistrationStatus) (*pluginregistration.RegistrationStatusResponse, error) {
66 if !req.PluginRegistered {
67 k.logger.Errorf("KVM plugin failed to register: %v", req.Error)
68 }
69 return &pluginregistration.RegistrationStatusResponse{}, nil
70}
71
72func (k *Plugin) GetDevicePluginOptions(context.Context, *deviceplugin.Empty) (*deviceplugin.DevicePluginOptions, error) {
73 return &deviceplugin.DevicePluginOptions{
74 GetPreferredAllocationAvailable: false,
75 PreStartRequired: false,
76 }, nil
77}
78
79func (k *Plugin) ListAndWatch(req *deviceplugin.Empty, s deviceplugin.DevicePlugin_ListAndWatchServer) error {
80 var devs []*deviceplugin.Device
81
82 // TODO(T963): Get this value from Kubelet configuration (or something higher-level?)
83 for i := 0; i < 256; i++ {
84 devs = append(devs, &deviceplugin.Device{
85 ID: fmt.Sprintf("kvm%v", i),
86 Health: deviceplugin.Healthy,
87 })
88 }
89
90 s.Send(&deviceplugin.ListAndWatchResponse{Devices: devs})
91
92 <-s.Context().Done()
93 return nil
94}
95
96func (k *Plugin) Allocate(ctx context.Context, req *deviceplugin.AllocateRequest) (*deviceplugin.AllocateResponse, error) {
97 var response deviceplugin.AllocateResponse
98
99 for _, req := range req.ContainerRequests {
100 var devices []*deviceplugin.DeviceSpec
101 for range req.DevicesIDs {
102 dev := new(deviceplugin.DeviceSpec)
103 dev.HostPath = "/dev/kvm"
104 dev.ContainerPath = "/dev/kvm"
105 dev.Permissions = "rw"
106 devices = append(devices, dev)
107 }
108 response.ContainerResponses = append(response.ContainerResponses, &deviceplugin.ContainerAllocateResponse{
109 Devices: devices})
110 }
111
112 return &response, nil
113}
114
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200115// deviceNumberFromString gets a Linux device number from a string containing
116// two decimal numbers representing the major and minor device numbers
117// separated by a colon. Whitespace is ignored.
Lorenz Brun4e090352021-03-17 17:44:41 +0100118func deviceNumberFromString(s string) (uint64, error) {
119 kvmDevParts := strings.Split(s, ":")
120 if len(kvmDevParts) != 2 {
121 return 0, fmt.Errorf("device file spec contains an invalid number of colons: `%v`", s)
122 }
123 major, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[0]), 10, 32)
124 if err != nil {
125 return 0, fmt.Errorf("failed to convert major number to an integer: %w", err)
126 }
127 minor, err := strconv.ParseUint(strings.TrimSpace(kvmDevParts[1]), 10, 32)
128 if err != nil {
129 return 0, fmt.Errorf("failed to convert minor number to an integer: %w", err)
130 }
131
132 return unix.Mkdev(uint32(major), uint32(minor)), nil
133}
134
135func (k *Plugin) Run(ctx context.Context) error {
136 k.logger = supervisor.Logger(ctx)
137
Lorenz Brun764a2de2021-11-22 16:26:36 +0100138 l1tfStatus, err := os.ReadFile("/sys/devices/system/cpu/vulnerabilities/l1tf")
Lorenz Brun4e090352021-03-17 17:44:41 +0100139 if err != nil && !os.IsNotExist(err) {
140 return fmt.Errorf("failed to query for CPU vulnerabilities: %v", err)
141 }
142
143 if bytes.Contains(l1tfStatus, []byte("vulnerable")) {
144 k.logger.Warning("CPU is vulnerable to L1TF, not exposing KVM.")
145 supervisor.Signal(ctx, supervisor.SignalHealthy)
146 supervisor.Signal(ctx, supervisor.SignalDone)
147 return nil
148 }
149
Lorenz Brun764a2de2021-11-22 16:26:36 +0100150 kvmDevRaw, err := os.ReadFile("/sys/devices/virtual/misc/kvm/dev")
Lorenz Brun4e090352021-03-17 17:44:41 +0100151 if err != nil {
152 k.logger.Warning("KVM is not available. Check firmware settings and CPU.")
153 supervisor.Signal(ctx, supervisor.SignalHealthy)
154 supervisor.Signal(ctx, supervisor.SignalDone)
155 return nil
156 }
157
158 kvmDevNode, err := deviceNumberFromString(string(kvmDevRaw))
159
160 err = unix.Mknod("/dev/kvm", 0660, int(kvmDevNode))
161 if err != nil && err != unix.EEXIST {
162 return fmt.Errorf("failed to create KVM device node: %v", err)
163 }
164
165 pluginListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.Plugins.KVM.FullPath(), Net: "unix"})
166 if err != nil {
167 return fmt.Errorf("failed to listen on device plugin socket: %w", err)
168 }
169 pluginListener.SetUnlinkOnClose(true)
170
171 pluginServer := grpc.NewServer()
172 deviceplugin.RegisterDevicePluginServer(pluginServer, k)
173 if err := supervisor.Run(ctx, "kvm-device", supervisor.GRPCServer(pluginServer, pluginListener, false)); err != nil {
174 return err
175 }
176
177 registrationListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: k.KubeletDirectory.PluginsRegistry.KVMReg.FullPath(), Net: "unix"})
178 if err != nil {
179 return fmt.Errorf("failed to listen on registration socket: %w", err)
180 }
181 registrationListener.SetUnlinkOnClose(true)
182
183 registrationServer := grpc.NewServer()
184 pluginregistration.RegisterRegistrationServer(registrationServer, k)
185 if err := supervisor.Run(ctx, "registration", supervisor.GRPCServer(registrationServer, registrationListener, true)); err != nil {
186 return err
187 }
188 supervisor.Signal(ctx, supervisor.SignalHealthy)
189 supervisor.Signal(ctx, supervisor.SignalDone)
190 return nil
191}