blob: c0d81bba306fce8f08104f941862c5fbdbb0fe66 [file] [log] [blame]
Lorenz Brun0db90ba2020-04-06 14:04:52 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package kubernetes
18
19import (
20 "context"
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020021 "errors"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020022 "fmt"
23 "net"
24 "os"
25 "path/filepath"
26 "regexp"
27
Lorenz Brun0db90ba2020-04-06 14:04:52 +020028 "github.com/container-storage-interface/spec/lib/go/csi"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020029 "golang.org/x/sys/unix"
30 "google.golang.org/grpc"
31 "google.golang.org/grpc/codes"
32 "google.golang.org/grpc/status"
Lorenz Brun65702192023-08-31 16:27:38 +020033 "google.golang.org/protobuf/types/known/wrapperspb"
Lorenz Brun6211e4d2023-11-14 19:09:40 +010034 pluginregistration "k8s.io/kubelet/pkg/apis/pluginregistration/v1"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020035
Serge Bazanski31370b02021-01-07 16:31:14 +010036 "source.monogon.dev/metropolis/node/core/localstorage"
Serge Bazanski31370b02021-01-07 16:31:14 +010037 "source.monogon.dev/metropolis/pkg/fsquota"
Lorenz Brun4e090352021-03-17 17:44:41 +010038 "source.monogon.dev/metropolis/pkg/logtree"
Lorenz Brun37050122021-03-30 14:00:27 +020039 "source.monogon.dev/metropolis/pkg/loop"
Serge Bazanski31370b02021-01-07 16:31:14 +010040 "source.monogon.dev/metropolis/pkg/supervisor"
Lorenz Brun0db90ba2020-04-06 14:04:52 +020041)
42
Serge Bazanski216fe7b2021-05-21 18:36:16 +020043// Derived from K8s spec for acceptable names, but shortened to 130 characters
44// to avoid issues with maximum path length. We don't provision longer names so
45// this applies only if you manually create a volume with a name of more than
46// 130 characters.
Lorenz Brun37050122021-03-30 14:00:27 +020047var acceptableNames = regexp.MustCompile("^[a-z][a-z0-9-.]{0,128}[a-z0-9]$")
Lorenz Brun0db90ba2020-04-06 14:04:52 +020048
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020049type csiPluginServer struct {
Lorenz Brun37050122021-03-30 14:00:27 +020050 *csi.UnimplementedNodeServer
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020051 KubeletDirectory *localstorage.DataKubernetesKubeletDirectory
52 VolumesDirectory *localstorage.DataVolumesDirectory
Lorenz Brun0db90ba2020-04-06 14:04:52 +020053
Serge Bazanskic7359672020-10-30 16:38:57 +010054 logger logtree.LeveledLogger
Lorenz Brun0db90ba2020-04-06 14:04:52 +020055}
56
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020057func (s *csiPluginServer) Run(ctx context.Context) error {
58 s.logger = supervisor.Logger(ctx)
Lorenz Brun0db90ba2020-04-06 14:04:52 +020059
Lorenz Brun4599aa22023-06-28 13:09:32 +020060 // Try to remove socket if an unclean shutdown happened.
61 os.Remove(s.KubeletDirectory.Plugins.VFS.FullPath())
62
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020063 pluginListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: s.KubeletDirectory.Plugins.VFS.FullPath(), Net: "unix"})
64 if err != nil {
65 return fmt.Errorf("failed to listen on CSI socket: %w", err)
Lorenz Brun0db90ba2020-04-06 14:04:52 +020066 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020067
68 pluginServer := grpc.NewServer()
69 csi.RegisterIdentityServer(pluginServer, s)
70 csi.RegisterNodeServer(pluginServer, s)
Serge Bazanski216fe7b2021-05-21 18:36:16 +020071 // Enable graceful shutdown since we don't have long-running RPCs and most
72 // of them shouldn't and can't be cancelled anyways.
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020073 if err := supervisor.Run(ctx, "csi-node", supervisor.GRPCServer(pluginServer, pluginListener, true)); err != nil {
74 return err
75 }
76
Lorenz Brun1dd0c652024-02-20 18:45:06 +010077 r := pluginRegistrationServer{
78 regErr: make(chan error, 1),
79 KubeletDirectory: s.KubeletDirectory,
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020080 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020081
Lorenz Brun1dd0c652024-02-20 18:45:06 +010082 if err := supervisor.Run(ctx, "registration", r.Run); err != nil {
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020083 return err
84 }
85 supervisor.Signal(ctx, supervisor.SignalHealthy)
86 supervisor.Signal(ctx, supervisor.SignalDone)
87 return nil
Lorenz Brun0db90ba2020-04-06 14:04:52 +020088}
89
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020090func (s *csiPluginServer) NodePublishVolume(ctx context.Context, req *csi.NodePublishVolumeRequest) (*csi.NodePublishVolumeResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +020091 if !acceptableNames.MatchString(req.VolumeId) {
92 return nil, status.Error(codes.InvalidArgument, "invalid characters in volume id")
93 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020094
95 // TODO(q3k): move this logic to localstorage?
96 volumePath := filepath.Join(s.VolumesDirectory.FullPath(), req.VolumeId)
97
Lorenz Brun0db90ba2020-04-06 14:04:52 +020098 switch req.VolumeCapability.AccessMode.Mode {
99 case csi.VolumeCapability_AccessMode_SINGLE_NODE_WRITER:
100 case csi.VolumeCapability_AccessMode_SINGLE_NODE_READER_ONLY:
101 default:
102 return nil, status.Error(codes.InvalidArgument, "unsupported access mode")
103 }
Lorenz Brund1c392a2023-07-06 19:10:56 +0200104 if err := os.MkdirAll(req.TargetPath, 0700); err != nil {
105 return nil, status.Errorf(codes.Internal, "unable to create requested target path: %v", err)
106 }
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200107 switch req.VolumeCapability.AccessType.(type) {
108 case *csi.VolumeCapability_Mount:
Lorenz Brun37050122021-03-30 14:00:27 +0200109 err := unix.Mount(volumePath, req.TargetPath, "", unix.MS_BIND, "")
110 switch {
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +0200111 case errors.Is(err, unix.ENOENT):
Lorenz Brun37050122021-03-30 14:00:27 +0200112 return nil, status.Error(codes.NotFound, "volume not found")
113 case err != nil:
114 return nil, status.Errorf(codes.Unavailable, "failed to bind-mount volume: %v", err)
115 }
116
117 if req.Readonly {
118 err := unix.Mount(volumePath, req.TargetPath, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, "")
119 if err != nil {
120 _ = unix.Unmount(req.TargetPath, 0) // Best-effort
121 return nil, status.Errorf(codes.Unavailable, "failed to remount volume: %v", err)
122 }
123 }
124 case *csi.VolumeCapability_Block:
125 f, err := os.OpenFile(volumePath, os.O_RDWR, 0)
126 if err != nil {
127 return nil, status.Errorf(codes.Unavailable, "failed to open block volume: %v", err)
128 }
129 defer f.Close()
130 var flags uint32 = loop.FlagDirectIO
131 if req.Readonly {
132 flags |= loop.FlagReadOnly
133 }
134 loopdev, err := loop.Create(f, loop.Config{Flags: flags})
135 if err != nil {
136 return nil, status.Errorf(codes.Unavailable, "failed to create loop device: %v", err)
137 }
138 loopdevNum, err := loopdev.Dev()
139 if err != nil {
140 loopdev.Remove()
141 return nil, status.Errorf(codes.Internal, "device number not available: %v", err)
142 }
143 if err := unix.Mknod(req.TargetPath, unix.S_IFBLK|0640, int(loopdevNum)); err != nil {
144 loopdev.Remove()
145 return nil, status.Errorf(codes.Unavailable, "failed to create device node at target path: %v", err)
146 }
147 loopdev.Close()
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200148 default:
149 return nil, status.Error(codes.InvalidArgument, "unsupported access type")
150 }
151
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200152 return &csi.NodePublishVolumeResponse{}, nil
153}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200154
Lorenz Brun37050122021-03-30 14:00:27 +0200155func (s *csiPluginServer) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUnpublishVolumeRequest) (*csi.NodeUnpublishVolumeResponse, error) {
156 loopdev, err := loop.Open(req.TargetPath)
157 if err == nil {
158 defer loopdev.Close()
159 // We have a block device
160 if err := loopdev.Remove(); err != nil {
161 return nil, status.Errorf(codes.Unavailable, "failed to remove loop device: %v", err)
162 }
163 if err := os.Remove(req.TargetPath); err != nil && !os.IsNotExist(err) {
164 return nil, status.Errorf(codes.Unavailable, "failed to remove device inode: %v", err)
165 }
166 return &csi.NodeUnpublishVolumeResponse{}, nil
167 }
168 // Otherwise try a normal unmount
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200169 if err := unix.Unmount(req.TargetPath, 0); err != nil {
170 return nil, status.Errorf(codes.Unavailable, "failed to unmount volume: %v", err)
171 }
172 return &csi.NodeUnpublishVolumeResponse{}, nil
173}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200174
175func (*csiPluginServer) NodeGetVolumeStats(ctx context.Context, req *csi.NodeGetVolumeStatsRequest) (*csi.NodeGetVolumeStatsResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200176 quota, err := fsquota.GetQuota(req.VolumePath)
177 if os.IsNotExist(err) {
178 return nil, status.Error(codes.NotFound, "volume does not exist at this path")
179 } else if err != nil {
180 return nil, status.Errorf(codes.Unavailable, "failed to get quota: %v", err)
181 }
182
183 return &csi.NodeGetVolumeStatsResponse{
184 Usage: []*csi.VolumeUsage{
185 {
186 Total: int64(quota.Bytes),
187 Unit: csi.VolumeUsage_BYTES,
188 Used: int64(quota.BytesUsed),
189 Available: int64(quota.Bytes - quota.BytesUsed),
190 },
191 {
192 Total: int64(quota.Inodes),
193 Unit: csi.VolumeUsage_INODES,
194 Used: int64(quota.InodesUsed),
195 Available: int64(quota.Inodes - quota.InodesUsed),
196 },
197 },
198 }, nil
199}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200200
Lorenz Brun37050122021-03-30 14:00:27 +0200201func (s *csiPluginServer) NodeExpandVolume(ctx context.Context, req *csi.NodeExpandVolumeRequest) (*csi.NodeExpandVolumeResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200202 if req.CapacityRange.LimitBytes <= 0 {
203 return nil, status.Error(codes.InvalidArgument, "invalid expanded volume size: at or below zero bytes")
204 }
Lorenz Brun37050122021-03-30 14:00:27 +0200205 loopdev, err := loop.Open(req.VolumePath)
206 if err == nil {
207 defer loopdev.Close()
208 volumePath := filepath.Join(s.VolumesDirectory.FullPath(), req.VolumeId)
209 imageFile, err := os.OpenFile(volumePath, os.O_RDWR, 0)
210 if err != nil {
211 return nil, status.Errorf(codes.Unavailable, "failed to open block volume backing file: %v", err)
212 }
213 defer imageFile.Close()
214 if err := unix.Fallocate(int(imageFile.Fd()), 0, 0, req.CapacityRange.LimitBytes); err != nil {
215 return nil, status.Errorf(codes.Unavailable, "failed to expand volume using fallocate: %v", err)
216 }
217 if err := loopdev.RefreshSize(); err != nil {
218 return nil, status.Errorf(codes.Unavailable, "failed to refresh loop device size: %v", err)
219 }
220 return &csi.NodeExpandVolumeResponse{CapacityBytes: req.CapacityRange.LimitBytes}, nil
221 }
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200222 if err := fsquota.SetQuota(req.VolumePath, uint64(req.CapacityRange.LimitBytes), 0); err != nil {
223 return nil, status.Errorf(codes.Unavailable, "failed to update quota: %v", err)
224 }
225 return &csi.NodeExpandVolumeResponse{CapacityBytes: req.CapacityRange.LimitBytes}, nil
226}
227
228func rpcCapability(cap csi.NodeServiceCapability_RPC_Type) *csi.NodeServiceCapability {
229 return &csi.NodeServiceCapability{
230 Type: &csi.NodeServiceCapability_Rpc{
231 Rpc: &csi.NodeServiceCapability_RPC{Type: cap},
232 },
233 }
234}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200235
236func (*csiPluginServer) NodeGetCapabilities(ctx context.Context, req *csi.NodeGetCapabilitiesRequest) (*csi.NodeGetCapabilitiesResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200237 return &csi.NodeGetCapabilitiesResponse{
238 Capabilities: []*csi.NodeServiceCapability{
239 rpcCapability(csi.NodeServiceCapability_RPC_EXPAND_VOLUME),
240 rpcCapability(csi.NodeServiceCapability_RPC_GET_VOLUME_STATS),
241 },
242 }, nil
243}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200244
245func (*csiPluginServer) NodeGetInfo(ctx context.Context, req *csi.NodeGetInfoRequest) (*csi.NodeGetInfoResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200246 hostname, err := os.Hostname()
247 if err != nil {
248 return nil, status.Errorf(codes.Unavailable, "failed to get node identity: %v", err)
249 }
250 return &csi.NodeGetInfoResponse{
251 NodeId: hostname,
252 }, nil
253}
254
255// CSI Identity endpoints
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200256func (*csiPluginServer) GetPluginInfo(ctx context.Context, req *csi.GetPluginInfoRequest) (*csi.GetPluginInfoResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200257 return &csi.GetPluginInfoResponse{
Serge Bazanski662b5b32020-12-21 13:49:00 +0100258 Name: "dev.monogon.metropolis.vfs",
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200259 VendorVersion: "0.0.1", // TODO(lorenz): Maybe stamp?
260 }, nil
261}
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200262
263func (*csiPluginServer) GetPluginCapabilities(ctx context.Context, req *csi.GetPluginCapabilitiesRequest) (*csi.GetPluginCapabilitiesResponse, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200264 return &csi.GetPluginCapabilitiesResponse{
265 Capabilities: []*csi.PluginCapability{
266 {
267 Type: &csi.PluginCapability_VolumeExpansion_{
268 VolumeExpansion: &csi.PluginCapability_VolumeExpansion{
269 Type: csi.PluginCapability_VolumeExpansion_ONLINE,
270 },
271 },
272 },
273 },
274 }, nil
275}
276
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200277func (s *csiPluginServer) Probe(ctx context.Context, req *csi.ProbeRequest) (*csi.ProbeResponse, error) {
Lorenz Brun65702192023-08-31 16:27:38 +0200278 return &csi.ProbeResponse{Ready: &wrapperspb.BoolValue{Value: true}}, nil
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200279}
280
Lorenz Brun1dd0c652024-02-20 18:45:06 +0100281// pluginRegistrationServer implements the pluginregistration.Registration
282// service. It has a special restart mechanic to accomodate a design issue
283// in Kubelet which requires it to remove and recreate its gRPC socket for
284// every new registration attempt.
285type pluginRegistrationServer struct {
286 // regErr has a buffer of 1, so that at least one error can always be
287 // sent into it in a non-blocking way. There is a race if
288 // NotifyRegistrationStatus is called twice with an error as the buffered
289 // item might have been received but not fully processed yet.
290 // As distinguishing between calls on different socket iterations is
291 // hard, doing it this way errs on the side of caution, i.e.
292 // generating too many restarts. This way is better as if we miss one
293 // such error the registration will not be available until the node
294 // gets restarted.
295 regErr chan error
296
297 KubeletDirectory *localstorage.DataKubernetesKubeletDirectory
298}
299
300func (r *pluginRegistrationServer) Run(ctx context.Context) error {
301 // Remove registration socket if it exists
302 os.Remove(r.KubeletDirectory.PluginsRegistry.VFSReg.FullPath())
303
304 registrationListener, err := net.ListenUnix("unix", &net.UnixAddr{Name: r.KubeletDirectory.PluginsRegistry.VFSReg.FullPath(), Net: "unix"})
305 if err != nil {
306 return fmt.Errorf("failed to listen on CSI registration socket: %w", err)
307 }
308 defer registrationListener.Close()
309
310 grpcS := grpc.NewServer()
311 pluginregistration.RegisterRegistrationServer(grpcS, r)
312
313 supervisor.Run(ctx, "rpc", supervisor.GRPCServer(grpcS, registrationListener, true))
314 supervisor.Signal(ctx, supervisor.SignalHealthy)
315 select {
316 case <-ctx.Done():
317 return ctx.Err()
318 case err = <-r.regErr:
319 return err
320 }
321}
322
323func (r *pluginRegistrationServer) GetInfo(ctx context.Context, req *pluginregistration.InfoRequest) (*pluginregistration.PluginInfo, error) {
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200324 return &pluginregistration.PluginInfo{
Lorenz Brun4e090352021-03-17 17:44:41 +0100325 Type: pluginregistration.CSIPlugin,
Serge Bazanski662b5b32020-12-21 13:49:00 +0100326 Name: "dev.monogon.metropolis.vfs",
Lorenz Brun1dd0c652024-02-20 18:45:06 +0100327 Endpoint: r.KubeletDirectory.Plugins.VFS.FullPath(),
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200328 SupportedVersions: []string{"1.2"}, // Keep in sync with container-storage-interface/spec package version
329 }, nil
330}
331
Lorenz Brun1dd0c652024-02-20 18:45:06 +0100332func (r *pluginRegistrationServer) NotifyRegistrationStatus(ctx context.Context, req *pluginregistration.RegistrationStatus) (*pluginregistration.RegistrationStatusResponse, error) {
333 if !req.PluginRegistered {
334 select {
335 case r.regErr <- fmt.Errorf("registration failed: %v", req.Error):
336 default:
337 }
Lorenz Brun0db90ba2020-04-06 14:04:52 +0200338 }
339 return &pluginregistration.RegistrationStatusResponse{}, nil
340}