blob: f7ff00a301c9a8dbf0afd8d90c458234c7874ce1 [file] [log] [blame]
Lorenz Brun0db90ba2020-04-06 14:04:52 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package kubernetes
18
19import (
20 "context"
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020021 "errors"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020022 "fmt"
23 "net"
24 "os"
25 "path/filepath"
26 "regexp"
27
Lorenz Brun0db90ba2020-04-06 14:04:52 +020028 "github.com/container-storage-interface/spec/lib/go/csi"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020029 "golang.org/x/sys/unix"
30 "google.golang.org/grpc"
31 "google.golang.org/grpc/codes"
32 "google.golang.org/grpc/status"
Lorenz Brun65702192023-08-31 16:27:38 +020033 "google.golang.org/protobuf/types/known/wrapperspb"
Lorenz Brun6211e4d2023-11-14 19:09:40 +010034 pluginregistration "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020035
Serge Bazanski3c5d0632024-09-12 10:49:12 +000036 "source.monogon.dev/go/logging"
Serge Bazanski31370b02021-01-07 16:31:14 +010037 "source.monogon.dev/metropolis/node/core/localstorage"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020038 "source.monogon.dev/osbase/fsquota"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020039 "source.monogon.dev/osbase/loop"
40 "source.monogon.dev/osbase/supervisor"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020041)
42
Serge Bazanski216fe7b2021-05-21 18:36:16 +020043// Derived from K8s spec for acceptable names, but shortened to 130 characters
44// to avoid issues with maximum path length. We don't provision longer names so
45// this applies only if you manually create a volume with a name of more than
46// 130 characters.
Lorenz Brun37050122021-03-30 14:00:27 +020047var acceptableNames = regexp.MustCompile("^[a-z][a-z0-9-.]{0,128}[a-z0-9]$")
Lorenz Brun0db90ba2020-04-06 14:04:52 +020048
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020049type csiPluginServer struct {
Lorenz Brun37050122021-03-30 14:00:27 +020050 *csi.UnimplementedNodeServer
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020051 KubeletDirectory *localstorage.DataKubernetesKubeletDirectory
52 VolumesDirectory *localstorage.DataVolumesDirectory
Lorenz Brun0db90ba2020-04-06 14:04:52 +020053
Serge Bazanski3c5d0632024-09-12 10:49:12 +000054 logger logging.Leveled
Lorenz Brun0db90ba2020-04-06 14:04:52 +020055}
56
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020057func (s *csiPluginServer) Run(ctx context.Context) error {
58 s.logger = supervisor.Logger(ctx)
Lorenz Brun0db90ba2020-04-06 14:04:52 +020059
Lorenz Brun4599aa22023-06-28 13:09:32 +020060 // Try to remove socket if an unclean shutdown happened.
61 os.Remove(s.KubeletDirectory.Plugins.VFS.FullPath())
62
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020063 pluginListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: s.KubeletDirectory.Plugins.VFS.FullPath(), Net: "unix"})
64 if err != nil {
65 return fmt.Errorf("failed to listen on CSI socket: %w", err)
Lorenz Brun0db90ba2020-04-06 14:04:52 +020066 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020067
68 pluginServer := grpc.NewServer()
69 csi.RegisterIdentityServer(pluginServer, s)
70 csi.RegisterNodeServer(pluginServer, s)
Serge Bazanski216fe7b2021-05-21 18:36:16 +020071 // Enable graceful shutdown since we don't have long-running RPCs and most
72 // of them shouldn't and can't be cancelled anyways.
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020073 if err := supervisor.Run(ctx, "csi-node", supervisor.GRPCServer(pluginServer, pluginListener, true)); err != nil {
74 return err
75 }
76
Lorenz Brun1dd0c652024-02-20 18:45:06 +010077 r := pluginRegistrationServer{
78 regErr: make(chan error, 1),
79 KubeletDirectory: s.KubeletDirectory,
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020080 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020081
Lorenz Brun1dd0c652024-02-20 18:45:06 +010082 if err := supervisor.Run(ctx, "registration", r.Run); err != nil {
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020083 return err
84 }
85 supervisor.Signal(ctx, supervisor.SignalHealthy)
86 supervisor.Signal(ctx, supervisor.SignalDone)
87 return nil
Lorenz Brun0db90ba2020-04-06 14:04:52 +020088}
89
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020090func (s *csiPluginServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +020091 if !acceptableNames.MatchString(req.VolumeId) {
92 return nil, status.Error(codes.InvalidArgument, "invalid characters in volume id")
93 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020094
95 // TODO(q3k): move this logic to localstorage?
96 volumePath := filepath.Join(s.VolumesDirectory.FullPath(), req.VolumeId)
97
Lorenz Brun0db90ba2020-04-06 14:04:52 +020098 switch req.VolumeCapability.AccessMode.Mode {
99 case csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER:
100 case csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY:
101 default:
102 return nil, status.Error(codes.InvalidArgument, "unsupported access mode")
103 }
104 switch req.VolumeCapability.AccessType.(type) {
105 case *csi.VolumeCapability_Mount:
Jan Schärbe70c922024-11-21 11:16:03 +0100106 if err := os.MkdirAll(req.TargetPath, 0700); err != nil {
107 return nil, status.Errorf(codes.Internal, "unable to create requested target path: %v", err)
108 }
109
Jan Schär73beb692024-11-27 17:47:09 +0100110 err := unix.Mount(volumePath, req.TargetPath, "", unix.MS_BIND, "")
Lorenz Brun37050122021-03-30 14:00:27 +0200111 switch {
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +0200112 case errors.Is(err, unix.ENOENT):
Lorenz Brun37050122021-03-30 14:00:27 +0200113 return nil, status.Error(codes.NotFound, "volume not found")
114 case err != nil:
115 return nil, status.Errorf(codes.Unavailable, "failed to bind-mount volume: %v", err)
116 }
117
Jan Schär73beb692024-11-27 17:47:09 +0100118 var flags uintptr = unix.MS_REMOUNT | unix.MS_BIND
119 if req.Readonly {
120 flags |= unix.MS_RDONLY
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200121 }
Jan Schär73beb692024-11-27 17:47:09 +0100122 if err := unix.Mount("", req.TargetPath, "", flags, ""); err != nil {
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200123 _ = unix.Unmount(req.TargetPath, 0) // Best-effort
Jan Schär73beb692024-11-27 17:47:09 +0100124 return nil, status.Errorf(codes.Internal, "unable to set mount-point flags: %v", err)
Tim Windelschmidta8938da2024-09-13 22:34:01 +0200125 }
Lorenz Brun37050122021-03-30 14:00:27 +0200126 case *csi.VolumeCapability_Block:
127 f, err := os.OpenFile(volumePath, os.O_RDWR, 0)
128 if err != nil {
129 return nil, status.Errorf(codes.Unavailable, "failed to open block volume: %v", err)
130 }
131 defer f.Close()
132 var flags uint32 = loop.FlagDirectIO
133 if req.Readonly {
134 flags |= loop.FlagReadOnly
135 }
136 loopdev, err := loop.Create(f, loop.Config{Flags: flags})
137 if err != nil {
138 return nil, status.Errorf(codes.Unavailable, "failed to create loop device: %v", err)
139 }
140 loopdevNum, err := loopdev.Dev()
141 if err != nil {
142 loopdev.Remove()
143 return nil, status.Errorf(codes.Internal, "device number not available: %v", err)
144 }
145 if err := unix.Mknod(req.TargetPath, unix.S_IFBLK|0640, int(loopdevNum)); err != nil {
146 loopdev.Remove()
147 return nil, status.Errorf(codes.Unavailable, "failed to create device node at target path: %v", err)
148 }
149 loopdev.Close()
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200150 default:
151 return nil, status.Error(codes.InvalidArgument, "unsupported access type")
152 }
153
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200154 return &csi.NodePublishVolumeResponse{}, nil
155}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200156
Lorenz Brun37050122021-03-30 14:00:27 +0200157func (s *csiPluginServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) {
158 loopdev, err := loop.Open(req.TargetPath)
159 if err == nil {
160 defer loopdev.Close()
161 // We have a block device
162 if err := loopdev.Remove(); err != nil {
163 return nil, status.Errorf(codes.Unavailable, "failed to remove loop device: %v", err)
164 }
165 if err := os.Remove(req.TargetPath); err != nil && !os.IsNotExist(err) {
166 return nil, status.Errorf(codes.Unavailable, "failed to remove device inode: %v", err)
167 }
168 return &csi.NodeUnpublishVolumeResponse{}, nil
169 }
170 // Otherwise try a normal unmount
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200171 if err := unix.Unmount(req.TargetPath, 0); err != nil {
172 return nil, status.Errorf(codes.Unavailable, "failed to unmount volume: %v", err)
173 }
174 return &csi.NodeUnpublishVolumeResponse{}, nil
175}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200176
177func (*csiPluginServer) NodeGetVolumeStats(ctx context.Context, req *csi.NodeGetVolumeStatsRequest) (*csi.NodeGetVolumeStatsResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200178 quota, err := fsquota.GetQuota(req.VolumePath)
179 if os.IsNotExist(err) {
180 return nil, status.Error(codes.NotFound, "volume does not exist at this path")
181 } else if err != nil {
182 return nil, status.Errorf(codes.Unavailable, "failed to get quota: %v", err)
183 }
184
185 return &csi.NodeGetVolumeStatsResponse{
186 Usage: []*csi.VolumeUsage{
187 {
188 Total: int64(quota.Bytes),
189 Unit: csi.VolumeUsage_BYTES,
190 Used: int64(quota.BytesUsed),
191 Available: int64(quota.Bytes - quota.BytesUsed),
192 },
193 {
194 Total: int64(quota.Inodes),
195 Unit: csi.VolumeUsage_INODES,
196 Used: int64(quota.InodesUsed),
197 Available: int64(quota.Inodes - quota.InodesUsed),
198 },
199 },
200 }, nil
201}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200202
Lorenz Brun37050122021-03-30 14:00:27 +0200203func (s *csiPluginServer) NodeExpandVolume(ctx context.Context, req *csi.NodeExpandVolumeRequest) (*csi.NodeExpandVolumeResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200204 if req.CapacityRange.LimitBytes <= 0 {
205 return nil, status.Error(codes.InvalidArgument, "invalid expanded volume size: at or below zero bytes")
206 }
Lorenz Brun37050122021-03-30 14:00:27 +0200207 loopdev, err := loop.Open(req.VolumePath)
208 if err == nil {
209 defer loopdev.Close()
210 volumePath := filepath.Join(s.VolumesDirectory.FullPath(), req.VolumeId)
211 imageFile, err := os.OpenFile(volumePath, os.O_RDWR, 0)
212 if err != nil {
213 return nil, status.Errorf(codes.Unavailable, "failed to open block volume backing file: %v", err)
214 }
215 defer imageFile.Close()
216 if err := unix.Fallocate(int(imageFile.Fd()), 0, 0, req.CapacityRange.LimitBytes); err != nil {
217 return nil, status.Errorf(codes.Unavailable, "failed to expand volume using fallocate: %v", err)
218 }
219 if err := loopdev.RefreshSize(); err != nil {
220 return nil, status.Errorf(codes.Unavailable, "failed to refresh loop device size: %v", err)
221 }
222 return &csi.NodeExpandVolumeResponse{CapacityBytes: req.CapacityRange.LimitBytes}, nil
223 }
Lorenz Brun397f7ea2024-08-20 21:26:06 +0200224 if err := fsquota.SetQuota(req.VolumePath, uint64(req.CapacityRange.LimitBytes), uint64(req.CapacityRange.LimitBytes)/inodeCapacityRatio); err != nil {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200225 return nil, status.Errorf(codes.Unavailable, "failed to update quota: %v", err)
226 }
227 return &csi.NodeExpandVolumeResponse{CapacityBytes: req.CapacityRange.LimitBytes}, nil
228}
229
230func rpcCapability(cap csi.NodeServiceCapability_RPC_Type) *csi.NodeServiceCapability {
231 return &csi.NodeServiceCapability{
232 Type: &csi.NodeServiceCapability_Rpc{
233 Rpc: &csi.NodeServiceCapability_RPC{Type: cap},
234 },
235 }
236}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200237
238func (*csiPluginServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCapabilitiesRequest) (*csi.NodeGetCapabilitiesResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200239 return &csi.NodeGetCapabilitiesResponse{
240 Capabilities: []*csi.NodeServiceCapability{
241 rpcCapability(csi.NodeServiceCapability_RPC_EXPAND_VOLUME),
242 rpcCapability(csi.NodeServiceCapability_RPC_GET_VOLUME_STATS),
243 },
244 }, nil
245}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200246
247func (*csiPluginServer) NodeGetInfo(ctx context.Context, req *csi.NodeGetInfoRequest) (*csi.NodeGetInfoResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200248 hostname, err := os.Hostname()
249 if err != nil {
250 return nil, status.Errorf(codes.Unavailable, "failed to get node identity: %v", err)
251 }
252 return &csi.NodeGetInfoResponse{
253 NodeId: hostname,
254 }, nil
255}
256
257// CSI Identity endpoints
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200258func (*csiPluginServer) GetPluginInfo(ctx context.Context, req *csi.GetPluginInfoRequest) (*csi.GetPluginInfoResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200259 return &csi.GetPluginInfoResponse{
Serge Bazanski662b5b32020-12-21 13:49:00 +0100260 Name: "dev.monogon.metropolis.vfs",
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200261 VendorVersion: "0.0.1", // TODO(lorenz): Maybe stamp?
262 }, nil
263}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200264
265func (*csiPluginServer) GetPluginCapabilities(ctx context.Context, req *csi.GetPluginCapabilitiesRequest) (*csi.GetPluginCapabilitiesResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200266 return &csi.GetPluginCapabilitiesResponse{
267 Capabilities: []*csi.PluginCapability{
268 {
269 Type: &csi.PluginCapability_VolumeExpansion_{
270 VolumeExpansion: &csi.PluginCapability_VolumeExpansion{
271 Type: csi.PluginCapability_VolumeExpansion_ONLINE,
272 },
273 },
274 },
275 },
276 }, nil
277}
278
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200279func (s *csiPluginServer) Probe(ctx context.Context, req *csi.ProbeRequest) (*csi.ProbeResponse, error) {
Lorenz Brun65702192023-08-31 16:27:38 +0200280 return &csi.ProbeResponse{Ready: &wrapperspb.BoolValue{Value: true}}, nil
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200281}
282
Lorenz Brun1dd0c652024-02-20 18:45:06 +0100283// pluginRegistrationServer implements the pluginregistration.Registration
284// service. It has a special restart mechanic to accomodate a design issue
285// in Kubelet which requires it to remove and recreate its gRPC socket for
286// every new registration attempt.
287type pluginRegistrationServer struct {
288 // regErr has a buffer of 1, so that at least one error can always be
289 // sent into it in a non-blocking way. There is a race if
290 // NotifyRegistrationStatus is called twice with an error as the buffered
291 // item might have been received but not fully processed yet.
292 // As distinguishing between calls on different socket iterations is
293 // hard, doing it this way errs on the side of caution, i.e.
294 // generating too many restarts. This way is better as if we miss one
295 // such error the registration will not be available until the node
296 // gets restarted.
297 regErr chan error
298
299 KubeletDirectory *localstorage.DataKubernetesKubeletDirectory
300}
301
302func (r *pluginRegistrationServer) Run(ctx context.Context) error {
303 // Remove registration socket if it exists
304 os.Remove(r.KubeletDirectory.PluginsRegistry.VFSReg.FullPath())
305
306 registrationListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: r.KubeletDirectory.PluginsRegistry.VFSReg.FullPath(), Net: "unix"})
307 if err != nil {
308 return fmt.Errorf("failed to listen on CSI registration socket: %w", err)
309 }
310 defer registrationListener.Close()
311
312 grpcS := grpc.NewServer()
313 pluginregistration.RegisterRegistrationServer(grpcS, r)
314
315 supervisor.Run(ctx, "rpc", supervisor.GRPCServer(grpcS, registrationListener, true))
316 supervisor.Signal(ctx, supervisor.SignalHealthy)
317 select {
318 case <-ctx.Done():
319 return ctx.Err()
320 case err = <-r.regErr:
321 return err
322 }
323}
324
325func (r *pluginRegistrationServer) GetInfo(ctx context.Context, req *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200326 return &pluginregistration.PluginInfo{
Lorenz Brun4e090352021-03-17 17:44:41 +0100327 Type: pluginregistration.CSIPlugin,
Serge Bazanski662b5b32020-12-21 13:49:00 +0100328 Name: "dev.monogon.metropolis.vfs",
Lorenz Brun1dd0c652024-02-20 18:45:06 +0100329 Endpoint: r.KubeletDirectory.Plugins.VFS.FullPath(),
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200330 SupportedVersions: []string{"1.2"}, // Keep in sync with container-storage-interface/spec package version
331 }, nil
332}
333
Lorenz Brun1dd0c652024-02-20 18:45:06 +0100334func (r *pluginRegistrationServer) NotifyRegistrationStatus(ctx context.Context, req *pluginregistration.RegistrationStatus) (*pluginregistration.RegistrationStatusResponse, error) {
335 if !req.PluginRegistered {
336 select {
337 case r.regErr <- fmt.Errorf("registration failed: %v", req.Error):
338 default:
339 }
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200340 }
341 return &pluginregistration.RegistrationStatusResponse{}, nil
342}