node/core: add sysctls
Change-Id: I47b0d639a62f73f134430c5164a35eef2b5622d7
Reviewed-on: https://review.monogon.dev/c/monogon/+/2273
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/node/core/BUILD.bazel b/metropolis/node/core/BUILD.bazel
index e8367f9..9043d8e 100644
--- a/metropolis/node/core/BUILD.bazel
+++ b/metropolis/node/core/BUILD.bazel
@@ -9,6 +9,7 @@
"nodeparams.go",
"panichandler.go",
"pstore.go",
+ "sysctl.go",
] + select({
"//metropolis/node:debug_build": [
"debug_service_enabled.go",
@@ -36,6 +37,7 @@
"//metropolis/pkg/logtree",
"//metropolis/pkg/pstore",
"//metropolis/pkg/supervisor",
+ "//metropolis/pkg/sysctl",
"//metropolis/pkg/tpm",
"//metropolis/proto/api",
"@com_github_cenkalti_backoff_v4//:backoff",
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index 3eb9024..583d72f 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -184,6 +184,9 @@
if err := supervisor.Run(ctx, "pstore", dumpAndCleanPstore); err != nil {
return fmt.Errorf("when starting pstore: %w", err)
}
+ if err := supervisor.Run(ctx, "sysctl", nodeSysctls); err != nil {
+ return fmt.Errorf("when applying sysctls: %w", err)
+ }
// The kernel does of course not run in this runnable, only the log pipe
// runs in it.
diff --git a/metropolis/node/core/network/BUILD.bazel b/metropolis/node/core/network/BUILD.bazel
index 1500eff..4f5a148 100644
--- a/metropolis/node/core/network/BUILD.bazel
+++ b/metropolis/node/core/network/BUILD.bazel
@@ -17,6 +17,7 @@
"//metropolis/pkg/event/memory",
"//metropolis/pkg/logtree",
"//metropolis/pkg/supervisor",
+ "//metropolis/pkg/sysctl",
"//net/proto",
"@com_github_google_nftables//:nftables",
"@com_github_google_nftables//expr",
diff --git a/metropolis/node/core/network/main.go b/metropolis/node/core/network/main.go
index 3cfe0f5..6532404 100644
--- a/metropolis/node/core/network/main.go
+++ b/metropolis/node/core/network/main.go
@@ -20,10 +20,7 @@
"context"
"fmt"
"net"
- "os"
- "path"
"strconv"
- "strings"
"github.com/google/nftables"
"github.com/google/nftables/expr"
@@ -36,6 +33,8 @@
"source.monogon.dev/metropolis/pkg/event"
"source.monogon.dev/metropolis/pkg/event/memory"
"source.monogon.dev/metropolis/pkg/supervisor"
+ "source.monogon.dev/metropolis/pkg/sysctl"
+
netpb "source.monogon.dev/net/proto"
)
@@ -194,27 +193,6 @@
return nil
}
-// sysctlOptions contains sysctl options to apply
-type sysctlOptions map[string]string
-
-// apply attempts to apply all options in sysctlOptions. It aborts on the first
-// one which returns an error when applying.
-func (o sysctlOptions) apply() error {
- for name, value := range o {
- filePath := path.Join("/proc/sys/", strings.ReplaceAll(name, ".", "/"))
- optionFile, err := os.OpenFile(filePath, os.O_WRONLY, 0)
- if err != nil {
- return fmt.Errorf("failed to set option %v: %w", name, err)
- }
- if _, err := optionFile.WriteString(value + "\n"); err != nil {
- optionFile.Close()
- return fmt.Errorf("failed to set option %v: %w", name, err)
- }
- optionFile.Close() // In a loop, defer'ing could open a lot of FDs
- }
- return nil
-}
-
// RFC2474 Section 4.2.2.1 with reference to RFC791 Section 3.1 (Network
// Control Precedence)
const dscpCS7 = 0x7 << 3
@@ -224,7 +202,7 @@
s.dnsSvc.ExtraListenerIPs = s.ExtraDNSListenerIPs
supervisor.Run(ctx, "dns", s.dnsSvc.Run)
- earlySysctlOpts := sysctlOptions{
+ earlySysctlOpts := sysctl.Options{
// Enable strict reverse path filtering on all interfaces (important
// for spoofing prevention from Pods with CAP_NET_ADMIN)
"net.ipv4.conf.all.rp_filter": "1",
@@ -239,7 +217,7 @@
// Make neighbor discovery use DSCP CS7 without ECN
"net.ipv6.conf.all.ndisc_tclass": strconv.Itoa(dscpCS7 << 2),
}
- if err := earlySysctlOpts.apply(); err != nil {
+ if err := earlySysctlOpts.Apply(); err != nil {
logger.Fatalf("Error configuring early sysctl options: %v", err)
}
// Choose between autoconfig and static config runnables
@@ -308,7 +286,7 @@
logger.Fatalf("Failed to set up nftables nat chain: %v", err)
}
- sysctlOpts := sysctlOptions{
+ sysctlOpts := sysctl.Options{
// Enable IP forwarding for our pods
"net.ipv4.ip_forward": "1",
@@ -319,7 +297,7 @@
"net.ipv4.tcp_rmem": "4096 87380 16777216",
"net.ipv4.tcp_wmem": "4096 87380 16777216",
}
- if err := sysctlOpts.apply(); err != nil {
+ if err := sysctlOpts.Apply(); err != nil {
logger.Fatalf("Failed to set up kernel network config: %v", err)
}
diff --git a/metropolis/node/core/network/static.go b/metropolis/node/core/network/static.go
index ffecf7d..1752810 100644
--- a/metropolis/node/core/network/static.go
+++ b/metropolis/node/core/network/static.go
@@ -21,6 +21,8 @@
"source.monogon.dev/metropolis/node/core/network/dns"
"source.monogon.dev/metropolis/pkg/logtree"
"source.monogon.dev/metropolis/pkg/supervisor"
+ "source.monogon.dev/metropolis/pkg/sysctl"
+
netpb "source.monogon.dev/net/proto"
)
@@ -123,9 +125,9 @@
hasIPv4Autoconfig = true
}
if i.Ipv6Autoconfig != nil {
- err := sysctlOptions{
+ err := sysctl.Options{
"net.ipv6.conf." + newLink.Attrs().Name + ".accept_ra": "1",
- }.apply()
+ }.Apply()
if err != nil {
return fmt.Errorf("failed enabling accept_ra for interface %q: %w", newLink.Attrs().Name, err)
}
diff --git a/metropolis/node/core/sysctl.go b/metropolis/node/core/sysctl.go
new file mode 100644
index 0000000..eb72aa3
--- /dev/null
+++ b/metropolis/node/core/sysctl.go
@@ -0,0 +1,26 @@
+package main
+
+import (
+ "context"
+ "strconv"
+
+ "source.monogon.dev/metropolis/pkg/supervisor"
+ "source.monogon.dev/metropolis/pkg/sysctl"
+)
+
+func nodeSysctls(ctx context.Context) error {
+ const vmMaxMapCount = 2<<30 - 1
+ options := sysctl.Options{
+ // We increase the max mmap count to nearly the maximum, as it gets
+ // accounted by the cgroup memory limit.
+ "vm.max_map_count": strconv.Itoa(vmMaxMapCount),
+ }
+
+ if err := options.Apply(); err != nil {
+ return err
+ }
+
+ supervisor.Signal(ctx, supervisor.SignalHealthy)
+ supervisor.Signal(ctx, supervisor.SignalDone)
+ return nil
+}
diff --git a/metropolis/pkg/sysctl/BUILD.bazel b/metropolis/pkg/sysctl/BUILD.bazel
new file mode 100644
index 0000000..a945a03
--- /dev/null
+++ b/metropolis/pkg/sysctl/BUILD.bazel
@@ -0,0 +1,8 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+
+go_library(
+ name = "sysctl",
+ srcs = ["options.go"],
+ importpath = "source.monogon.dev/metropolis/pkg/sysctl",
+ visibility = ["//visibility:public"],
+)
diff --git a/metropolis/pkg/sysctl/options.go b/metropolis/pkg/sysctl/options.go
new file mode 100644
index 0000000..b5e1e36
--- /dev/null
+++ b/metropolis/pkg/sysctl/options.go
@@ -0,0 +1,29 @@
+package sysctl
+
+import (
+ "fmt"
+ "os"
+ "path"
+ "strings"
+)
+
+// Options contains sysctl options to apply
+type Options map[string]string
+
+// Apply attempts to apply all options in Options. It aborts on the first
+// one which returns an error when applying.
+func (o Options) Apply() error {
+ for name, value := range o {
+ filePath := path.Join("/proc/sys/", strings.ReplaceAll(name, ".", "/"))
+ optionFile, err := os.OpenFile(filePath, os.O_WRONLY, 0)
+ if err != nil {
+ return fmt.Errorf("failed to set option %v: %w", name, err)
+ }
+ if _, err := optionFile.WriteString(value + "\n"); err != nil {
+ optionFile.Close()
+ return fmt.Errorf("failed to set option %v: %w", name, err)
+ }
+ optionFile.Close() // In a loop, defer'ing could open a lot of FDs
+ }
+ return nil
+}