blob: 40d54d025bccf6db8a6835f56438956da483d400 [file] [log] [blame]
Lorenz Brun0db90ba2020-04-06 14:04:52 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package kubernetes
18
19import (
20 "context"
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020021 "errors"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020022 "fmt"
23 "net"
24 "os"
25 "path/filepath"
26 "regexp"
27
Lorenz Brun0db90ba2020-04-06 14:04:52 +020028 "github.com/container-storage-interface/spec/lib/go/csi"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020029 "golang.org/x/sys/unix"
30 "google.golang.org/grpc"
31 "google.golang.org/grpc/codes"
32 "google.golang.org/grpc/status"
Lorenz Brun65702192023-08-31 16:27:38 +020033 "google.golang.org/protobuf/types/known/wrapperspb"
Lorenz Brun6211e4d2023-11-14 19:09:40 +010034 pluginregistration "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020035
Serge Bazanski3c5d0632024-09-12 10:49:12 +000036 "source.monogon.dev/go/logging"
Serge Bazanski31370b02021-01-07 16:31:14 +010037 "source.monogon.dev/metropolis/node/core/localstorage"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020038 "source.monogon.dev/osbase/fsquota"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020039 "source.monogon.dev/osbase/loop"
40 "source.monogon.dev/osbase/supervisor"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020041)
42
Serge Bazanski216fe7b2021-05-21 18:36:16 +020043// Derived from K8s spec for acceptable names, but shortened to 130 characters
44// to avoid issues with maximum path length. We don't provision longer names so
45// this applies only if you manually create a volume with a name of more than
46// 130 characters.
Lorenz Brun37050122021-03-30 14:00:27 +020047var acceptableNames = regexp.MustCompile("^[a-z][a-z0-9-.]{0,128}[a-z0-9]$")
Lorenz Brun0db90ba2020-04-06 14:04:52 +020048
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020049type csiPluginServer struct {
Lorenz Brun37050122021-03-30 14:00:27 +020050 *csi.UnimplementedNodeServer
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020051 KubeletDirectory *localstorage.DataKubernetesKubeletDirectory
52 VolumesDirectory *localstorage.DataVolumesDirectory
Lorenz Brun0db90ba2020-04-06 14:04:52 +020053
Serge Bazanski3c5d0632024-09-12 10:49:12 +000054 logger logging.Leveled
Lorenz Brun0db90ba2020-04-06 14:04:52 +020055}
56
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020057func (s *csiPluginServer) Run(ctx context.Context) error {
58 s.logger = supervisor.Logger(ctx)
Lorenz Brun0db90ba2020-04-06 14:04:52 +020059
Lorenz Brun4599aa22023-06-28 13:09:32 +020060 // Try to remove socket if an unclean shutdown happened.
61 os.Remove(s.KubeletDirectory.Plugins.VFS.FullPath())
62
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020063 pluginListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: s.KubeletDirectory.Plugins.VFS.FullPath(), Net: "unix"})
64 if err != nil {
65 return fmt.Errorf("failed to listen on CSI socket: %w", err)
Lorenz Brun0db90ba2020-04-06 14:04:52 +020066 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020067
68 pluginServer := grpc.NewServer()
69 csi.RegisterIdentityServer(pluginServer, s)
70 csi.RegisterNodeServer(pluginServer, s)
Serge Bazanski216fe7b2021-05-21 18:36:16 +020071 // Enable graceful shutdown since we don't have long-running RPCs and most
72 // of them shouldn't and can't be cancelled anyways.
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020073 if err := supervisor.Run(ctx, "csi-node", supervisor.GRPCServer(pluginServer, pluginListener, true)); err != nil {
74 return err
75 }
76
Lorenz Brun1dd0c652024-02-20 18:45:06 +010077 r := pluginRegistrationServer{
78 regErr: make(chan error, 1),
79 KubeletDirectory: s.KubeletDirectory,
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020080 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020081
Lorenz Brun1dd0c652024-02-20 18:45:06 +010082 if err := supervisor.Run(ctx, "registration", r.Run); err != nil {
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020083 return err
84 }
85 supervisor.Signal(ctx, supervisor.SignalHealthy)
86 supervisor.Signal(ctx, supervisor.SignalDone)
87 return nil
Lorenz Brun0db90ba2020-04-06 14:04:52 +020088}
89
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020090func (s *csiPluginServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +020091 if !acceptableNames.MatchString(req.VolumeId) {
92 return nil, status.Error(codes.InvalidArgument, "invalid characters in volume id")
93 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020094
95 // TODO(q3k): move this logic to localstorage?
96 volumePath := filepath.Join(s.VolumesDirectory.FullPath(), req.VolumeId)
97
Lorenz Brun0db90ba2020-04-06 14:04:52 +020098 switch req.VolumeCapability.AccessMode.Mode {
99 case csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER:
100 case csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY:
101 default:
102 return nil, status.Error(codes.InvalidArgument, "unsupported access mode")
103 }
104 switch req.VolumeCapability.AccessType.(type) {
105 case *csi.VolumeCapability_Mount:
Jan Schärbe70c922024-11-21 11:16:03 +0100106 if err := os.MkdirAll(req.TargetPath, 0700); err != nil {
107 return nil, status.Errorf(codes.Internal, "unable to create requested target path: %v", err)
108 }
109
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200110 var mountFlags uintptr = unix.MS_BIND
111 if req.Readonly {
112 mountFlags |= unix.MS_RDONLY
113 }
114
115 err := unix.Mount(volumePath, req.TargetPath, "", mountFlags, "")
Lorenz Brun37050122021-03-30 14:00:27 +0200116 switch {
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +0200117 case errors.Is(err, unix.ENOENT):
Lorenz Brun37050122021-03-30 14:00:27 +0200118 return nil, status.Error(codes.NotFound, "volume not found")
119 case err != nil:
120 return nil, status.Errorf(codes.Unavailable, "failed to bind-mount volume: %v", err)
121 }
122
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200123 flagSet := make(map[string]bool)
124 for _, flag := range req.VolumeCapability.GetMount().GetMountFlags() {
125 flagSet[flag] = true
126 }
127
128 flagPairs := map[string]string{
129 "exec": "noexec",
130 "dev": "nodev",
131 "suid": "nosuid",
132 }
133 for pFlag, nFlag := range flagPairs {
134 if flagSet[pFlag] && flagSet[nFlag] {
135 return nil, status.Errorf(codes.InvalidArgument, "contradictory flag pair found. can't have both %q and %q set", pFlag, nFlag)
136 } else if !flagSet[pFlag] && !flagSet[nFlag] {
137 // If neither of a flag pair is found, add the negative flag as default.
138 flagSet[nFlag] = true
Lorenz Brun37050122021-03-30 14:00:27 +0200139 }
140 }
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200141
142 var mountAttr unix.MountAttr
143 for flag := range flagSet {
144 switch flag {
145 case "exec":
Jan Schär652c2ad2024-11-19 17:40:50 +0100146 mountAttr.Attr_clr |= unix.MOUNT_ATTR_NOEXEC
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200147 case "noexec":
Jan Schär652c2ad2024-11-19 17:40:50 +0100148 mountAttr.Attr_set |= unix.MOUNT_ATTR_NOEXEC
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200149 case "dev":
Jan Schär652c2ad2024-11-19 17:40:50 +0100150 mountAttr.Attr_clr |= unix.MOUNT_ATTR_NODEV
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200151 case "nodev":
Jan Schär652c2ad2024-11-19 17:40:50 +0100152 mountAttr.Attr_set |= unix.MOUNT_ATTR_NODEV
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200153 case "suid":
Jan Schär652c2ad2024-11-19 17:40:50 +0100154 mountAttr.Attr_clr |= unix.MOUNT_ATTR_NOSUID
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200155 case "nosuid":
Jan Schär652c2ad2024-11-19 17:40:50 +0100156 mountAttr.Attr_set |= unix.MOUNT_ATTR_NOSUID
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200157 default:
158 return nil, status.Errorf(codes.InvalidArgument, "unknown mount flag: %s", flag)
159 }
160 }
161
162 if err := unix.MountSetattr(-1, req.TargetPath, 0, &mountAttr); err != nil {
163 _ = unix.Unmount(req.TargetPath, 0) // Best-effort
164 return nil, status.Errorf(codes.Internal, "unable to set mount attributes: %v", err)
165 }
Lorenz Brun37050122021-03-30 14:00:27 +0200166 case *csi.VolumeCapability_Block:
167 f, err := os.OpenFile(volumePath, os.O_RDWR, 0)
168 if err != nil {
169 return nil, status.Errorf(codes.Unavailable, "failed to open block volume: %v", err)
170 }
171 defer f.Close()
172 var flags uint32 = loop.FlagDirectIO
173 if req.Readonly {
174 flags |= loop.FlagReadOnly
175 }
176 loopdev, err := loop.Create(f, loop.Config{Flags: flags})
177 if err != nil {
178 return nil, status.Errorf(codes.Unavailable, "failed to create loop device: %v", err)
179 }
180 loopdevNum, err := loopdev.Dev()
181 if err != nil {
182 loopdev.Remove()
183 return nil, status.Errorf(codes.Internal, "device number not available: %v", err)
184 }
185 if err := unix.Mknod(req.TargetPath, unix.S_IFBLK|0640, int(loopdevNum)); err != nil {
186 loopdev.Remove()
187 return nil, status.Errorf(codes.Unavailable, "failed to create device node at target path: %v", err)
188 }
189 loopdev.Close()
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200190 default:
191 return nil, status.Error(codes.InvalidArgument, "unsupported access type")
192 }
193
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200194 return &csi.NodePublishVolumeResponse{}, nil
195}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200196
Lorenz Brun37050122021-03-30 14:00:27 +0200197func (s *csiPluginServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) {
198 loopdev, err := loop.Open(req.TargetPath)
199 if err == nil {
200 defer loopdev.Close()
201 // We have a block device
202 if err := loopdev.Remove(); err != nil {
203 return nil, status.Errorf(codes.Unavailable, "failed to remove loop device: %v", err)
204 }
205 if err := os.Remove(req.TargetPath); err != nil && !os.IsNotExist(err) {
206 return nil, status.Errorf(codes.Unavailable, "failed to remove device inode: %v", err)
207 }
208 return &csi.NodeUnpublishVolumeResponse{}, nil
209 }
210 // Otherwise try a normal unmount
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200211 if err := unix.Unmount(req.TargetPath, 0); err != nil {
212 return nil, status.Errorf(codes.Unavailable, "failed to unmount volume: %v", err)
213 }
214 return &csi.NodeUnpublishVolumeResponse{}, nil
215}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200216
217func (*csiPluginServer) NodeGetVolumeStats(ctx context.Context, req *csi.NodeGetVolumeStatsRequest) (*csi.NodeGetVolumeStatsResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200218 quota, err := fsquota.GetQuota(req.VolumePath)
219 if os.IsNotExist(err) {
220 return nil, status.Error(codes.NotFound, "volume does not exist at this path")
221 } else if err != nil {
222 return nil, status.Errorf(codes.Unavailable, "failed to get quota: %v", err)
223 }
224
225 return &csi.NodeGetVolumeStatsResponse{
226 Usage: []*csi.VolumeUsage{
227 {
228 Total: int64(quota.Bytes),
229 Unit: csi.VolumeUsage_BYTES,
230 Used: int64(quota.BytesUsed),
231 Available: int64(quota.Bytes - quota.BytesUsed),
232 },
233 {
234 Total: int64(quota.Inodes),
235 Unit: csi.VolumeUsage_INODES,
236 Used: int64(quota.InodesUsed),
237 Available: int64(quota.Inodes - quota.InodesUsed),
238 },
239 },
240 }, nil
241}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200242
Lorenz Brun37050122021-03-30 14:00:27 +0200243func (s *csiPluginServer) NodeExpandVolume(ctx context.Context, req *csi.NodeExpandVolumeRequest) (*csi.NodeExpandVolumeResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200244 if req.CapacityRange.LimitBytes <= 0 {
245 return nil, status.Error(codes.InvalidArgument, "invalid expanded volume size: at or below zero bytes")
246 }
Lorenz Brun37050122021-03-30 14:00:27 +0200247 loopdev, err := loop.Open(req.VolumePath)
248 if err == nil {
249 defer loopdev.Close()
250 volumePath := filepath.Join(s.VolumesDirectory.FullPath(), req.VolumeId)
251 imageFile, err := os.OpenFile(volumePath, os.O_RDWR, 0)
252 if err != nil {
253 return nil, status.Errorf(codes.Unavailable, "failed to open block volume backing file: %v", err)
254 }
255 defer imageFile.Close()
256 if err := unix.Fallocate(int(imageFile.Fd()), 0, 0, req.CapacityRange.LimitBytes); err != nil {
257 return nil, status.Errorf(codes.Unavailable, "failed to expand volume using fallocate: %v", err)
258 }
259 if err := loopdev.RefreshSize(); err != nil {
260 return nil, status.Errorf(codes.Unavailable, "failed to refresh loop device size: %v", err)
261 }
262 return &csi.NodeExpandVolumeResponse{CapacityBytes: req.CapacityRange.LimitBytes}, nil
263 }
Lorenz Brun397f7ea2024-08-20 21:26:06 +0200264 if err := fsquota.SetQuota(req.VolumePath, uint64(req.CapacityRange.LimitBytes), uint64(req.CapacityRange.LimitBytes)/inodeCapacityRatio); err != nil {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200265 return nil, status.Errorf(codes.Unavailable, "failed to update quota: %v", err)
266 }
267 return &csi.NodeExpandVolumeResponse{CapacityBytes: req.CapacityRange.LimitBytes}, nil
268}
269
270func rpcCapability(cap csi.NodeServiceCapability_RPC_Type) *csi.NodeServiceCapability {
271 return &csi.NodeServiceCapability{
272 Type: &csi.NodeServiceCapability_Rpc{
273 Rpc: &csi.NodeServiceCapability_RPC{Type: cap},
274 },
275 }
276}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200277
278func (*csiPluginServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCapabilitiesRequest) (*csi.NodeGetCapabilitiesResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200279 return &csi.NodeGetCapabilitiesResponse{
280 Capabilities: []*csi.NodeServiceCapability{
281 rpcCapability(csi.NodeServiceCapability_RPC_EXPAND_VOLUME),
282 rpcCapability(csi.NodeServiceCapability_RPC_GET_VOLUME_STATS),
283 },
284 }, nil
285}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200286
287func (*csiPluginServer) NodeGetInfo(ctx context.Context, req *csi.NodeGetInfoRequest) (*csi.NodeGetInfoResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200288 hostname, err := os.Hostname()
289 if err != nil {
290 return nil, status.Errorf(codes.Unavailable, "failed to get node identity: %v", err)
291 }
292 return &csi.NodeGetInfoResponse{
293 NodeId: hostname,
294 }, nil
295}
296
297// CSI Identity endpoints
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200298func (*csiPluginServer) GetPluginInfo(ctx context.Context, req *csi.GetPluginInfoRequest) (*csi.GetPluginInfoResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200299 return &csi.GetPluginInfoResponse{
Serge Bazanski662b5b32020-12-21 13:49:00 +0100300 Name: "dev.monogon.metropolis.vfs",
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200301 VendorVersion: "0.0.1", // TODO(lorenz): Maybe stamp?
302 }, nil
303}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200304
305func (*csiPluginServer) GetPluginCapabilities(ctx context.Context, req *csi.GetPluginCapabilitiesRequest) (*csi.GetPluginCapabilitiesResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200306 return &csi.GetPluginCapabilitiesResponse{
307 Capabilities: []*csi.PluginCapability{
308 {
309 Type: &csi.PluginCapability_VolumeExpansion_{
310 VolumeExpansion: &csi.PluginCapability_VolumeExpansion{
311 Type: csi.PluginCapability_VolumeExpansion_ONLINE,
312 },
313 },
314 },
315 },
316 }, nil
317}
318
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200319func (s *csiPluginServer) Probe(ctx context.Context, req *csi.ProbeRequest) (*csi.ProbeResponse, error) {
Lorenz Brun65702192023-08-31 16:27:38 +0200320 return &csi.ProbeResponse{Ready: &wrapperspb.BoolValue{Value: true}}, nil
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200321}
322
Lorenz Brun1dd0c652024-02-20 18:45:06 +0100323// pluginRegistrationServer implements the pluginregistration.Registration
324// service. It has a special restart mechanic to accomodate a design issue
325// in Kubelet which requires it to remove and recreate its gRPC socket for
326// every new registration attempt.
327type pluginRegistrationServer struct {
328 // regErr has a buffer of 1, so that at least one error can always be
329 // sent into it in a non-blocking way. There is a race if
330 // NotifyRegistrationStatus is called twice with an error as the buffered
331 // item might have been received but not fully processed yet.
332 // As distinguishing between calls on different socket iterations is
333 // hard, doing it this way errs on the side of caution, i.e.
334 // generating too many restarts. This way is better as if we miss one
335 // such error the registration will not be available until the node
336 // gets restarted.
337 regErr chan error
338
339 KubeletDirectory *localstorage.DataKubernetesKubeletDirectory
340}
341
342func (r *pluginRegistrationServer) Run(ctx context.Context) error {
343 // Remove registration socket if it exists
344 os.Remove(r.KubeletDirectory.PluginsRegistry.VFSReg.FullPath())
345
346 registrationListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: r.KubeletDirectory.PluginsRegistry.VFSReg.FullPath(), Net: "unix"})
347 if err != nil {
348 return fmt.Errorf("failed to listen on CSI registration socket: %w", err)
349 }
350 defer registrationListener.Close()
351
352 grpcS := grpc.NewServer()
353 pluginregistration.RegisterRegistrationServer(grpcS, r)
354
355 supervisor.Run(ctx, "rpc", supervisor.GRPCServer(grpcS, registrationListener, true))
356 supervisor.Signal(ctx, supervisor.SignalHealthy)
357 select {
358 case <-ctx.Done():
359 return ctx.Err()
360 case err = <-r.regErr:
361 return err
362 }
363}
364
365func (r *pluginRegistrationServer) GetInfo(ctx context.Context, req *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200366 return &pluginregistration.PluginInfo{
Lorenz Brun4e090352021-03-17 17:44:41 +0100367 Type: pluginregistration.CSIPlugin,
Serge Bazanski662b5b32020-12-21 13:49:00 +0100368 Name: "dev.monogon.metropolis.vfs",
Lorenz Brun1dd0c652024-02-20 18:45:06 +0100369 Endpoint: r.KubeletDirectory.Plugins.VFS.FullPath(),
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200370 SupportedVersions: []string{"1.2"}, // Keep in sync with container-storage-interface/spec package version
371 }, nil
372}
373
Lorenz Brun1dd0c652024-02-20 18:45:06 +0100374func (r *pluginRegistrationServer) NotifyRegistrationStatus(ctx context.Context, req *pluginregistration.RegistrationStatus) (*pluginregistration.RegistrationStatusResponse, error) {
375 if !req.PluginRegistered {
376 select {
377 case r.regErr <- fmt.Errorf("registration failed: %v", req.Error):
378 default:
379 }
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200380 }
381 return &pluginregistration.RegistrationStatusResponse{}, nil
382}