metropolis/node/kubernetes: add mountOptions support for PVs
We have very strict defaults on our data mount which prevents exec's and
suid binaries. By adding support for mountOptions on PVs we enable
the user to allow specific behaviour e.g. exec's on the given PV.
Change-Id: I902cf3b9dafb14598cddc18c327ef3f5bcd6450b
Reviewed-on: https://review.monogon.dev/c/monogon/+/3421
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/kubernetes/csi.go b/metropolis/node/kubernetes/csi.go
index 3236bba..5ca6885 100644
--- a/metropolis/node/kubernetes/csi.go
+++ b/metropolis/node/kubernetes/csi.go
@@ -106,7 +106,12 @@
}
switch req.VolumeCapability.AccessType.(type) {
case *csi.VolumeCapability_Mount:
- err := unix.Mount(volumePath, req.TargetPath, "", unix.MS_BIND, "")
+ var mountFlags uintptr = unix.MS_BIND
+ if req.Readonly {
+ mountFlags |= unix.MS_RDONLY
+ }
+
+ err := unix.Mount(volumePath, req.TargetPath, "", mountFlags, "")
switch {
case errors.Is(err, unix.ENOENT):
return nil, status.Error(codes.NotFound, "volume not found")
@@ -114,13 +119,49 @@
return nil, status.Errorf(codes.Unavailable, "failed to bind-mount volume: %v", err)
}
- if req.Readonly {
- err := unix.Mount(volumePath, req.TargetPath, "", unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, "")
- if err != nil {
- _ = unix.Unmount(req.TargetPath, 0) // Best-effort
- return nil, status.Errorf(codes.Unavailable, "failed to remount volume: %v", err)
+ flagSet := make(map[string]bool)
+ for _, flag := range req.VolumeCapability.GetMount().GetMountFlags() {
+ flagSet[flag] = true
+ }
+
+ flagPairs := map[string]string{
+ "exec": "noexec",
+ "dev": "nodev",
+ "suid": "nosuid",
+ }
+ for pFlag, nFlag := range flagPairs {
+ if flagSet[pFlag] && flagSet[nFlag] {
+ return nil, status.Errorf(codes.InvalidArgument, "contradictory flag pair found. can't have both %q and %q set", pFlag, nFlag)
+ } else if !flagSet[pFlag] && !flagSet[nFlag] {
+ // If neither of a flag pair is found, add the negative flag as default.
+ flagSet[nFlag] = true
}
}
+
+ var mountAttr unix.MountAttr
+ for flag := range flagSet {
+ switch flag {
+ case "exec":
+ mountAttr.Attr_clr = unix.MOUNT_ATTR_NOEXEC
+ case "noexec":
+ mountAttr.Attr_set = unix.MOUNT_ATTR_NOEXEC
+ case "dev":
+ mountAttr.Attr_clr = unix.MOUNT_ATTR_NODEV
+ case "nodev":
+ mountAttr.Attr_set = unix.MOUNT_ATTR_NODEV
+ case "suid":
+ mountAttr.Attr_clr = unix.MOUNT_ATTR_NOSUID
+ case "nosuid":
+ mountAttr.Attr_set = unix.MOUNT_ATTR_NOSUID
+ default:
+ return nil, status.Errorf(codes.InvalidArgument, "unknown mount flag: %s", flag)
+ }
+ }
+
+ if err := unix.MountSetattr(-1, req.TargetPath, 0, &mountAttr); err != nil {
+ _ = unix.Unmount(req.TargetPath, 0) // Best-effort
+ return nil, status.Errorf(codes.Internal, "unable to set mount attributes: %v", err)
+ }
case *csi.VolumeCapability_Block:
f, err := os.OpenFile(volumePath, os.O_RDWR, 0)
if err != nil {
diff --git a/metropolis/node/kubernetes/reconciler/resources_storageclass.go b/metropolis/node/kubernetes/reconciler/resources_storageclass.go
index b242bbf..36dee1c 100644
--- a/metropolis/node/kubernetes/reconciler/resources_storageclass.go
+++ b/metropolis/node/kubernetes/reconciler/resources_storageclass.go
@@ -66,12 +66,41 @@
Labels: builtinLabels(nil),
Annotations: map[string]string{
"storageclass.kubernetes.io/is-default-class": "true",
+ "kubernetes.io/description": "local is the default storage class on Metropolis. " +
+ "It stores data on the node root disk and supports space limits, resizing and oversubscription but no snapshots. " +
+ "It is backed by XFS and uses permissive mounting options (exec,dev,suid). " +
+ "If you want more strict mounting options, chose the `local-strict` storage class.",
},
},
AllowVolumeExpansion: True(),
Provisioner: csiProvisionerName,
ReclaimPolicy: &reclaimPolicyDelete,
VolumeBindingMode: &waitForConsumerBinding,
+ MountOptions: []string{
+ "exec",
+ "dev",
+ "suid",
+ },
+ },
+ &storage.StorageClass{
+ ObjectMeta: meta.ObjectMeta{
+ Name: "local-strict",
+ Labels: builtinLabels(nil),
+ Annotations: map[string]string{
+ "storageclass.kubernetes.io/is-default-class": "false",
+ "kubernetes.io/description": "local-strict is the same as local (see its description) but uses strict mount options (noexec, nodev, nosuid). " +
+ "It is best used together with readOnlyRoot to restrict exploitation vectors.",
+ },
+ },
+ AllowVolumeExpansion: True(),
+ Provisioner: csiProvisionerName,
+ ReclaimPolicy: &reclaimPolicyDelete,
+ VolumeBindingMode: &waitForConsumerBinding,
+ MountOptions: []string{
+ "noexec",
+ "nodev",
+ "nosuid",
+ },
},
}
}