Set reasonable defaults for our kernel's network configuration

This sets a number of sysctl options to tune the kernel for a datacenter-like environment by increasing
buffers and choosing a better congestion control algorithm. It also enforces reverse path filtering to
prevent spoofing from CAP_NET_ADMIN-enabled containers and blocks source routing as we have no need for that
and it might some day interfere with policy efforts.

To set all these options a small helper structure has been added which makes setting these more compact
and nicer to read.

Test Plan: Covered by E2E for breakage, scalability improvements not yet testable

Bug: T495

X-Origin-Diff: phab/D704
GitOrigin-RevId: 427b2513d604090e51b37587d772f240112be09d
diff --git a/metropolis/node/core/network/main.go b/metropolis/node/core/network/main.go
index 25afe8e..c23b85c 100644
--- a/metropolis/node/core/network/main.go
+++ b/metropolis/node/core/network/main.go
@@ -20,8 +20,10 @@
 	"context"
 	"errors"
 	"fmt"
-	"io/ioutil"
 	"net"
+	"os"
+	"path"
+	"strings"
 	"sync"
 	"time"
 
@@ -157,6 +159,26 @@
 	}
 }
 
+// sysctlOptions contains sysctl options to apply
+type sysctlOptions map[string]string
+
+// apply attempts to apply all options in sysctlOptions. It aborts on the first one which returns an error when applying.
+func (o sysctlOptions) apply() error {
+	for name, value := range o {
+		filePath := path.Join("/proc/sys/", strings.ReplaceAll(name, ".", "/"))
+		optionFile, err := os.OpenFile(filePath, os.O_WRONLY, 0)
+		if err != nil {
+			return fmt.Errorf("failed to set option %v: %w", name, err)
+		}
+		if _, err := optionFile.WriteString(value + "\n"); err != nil {
+			optionFile.Close()
+			return fmt.Errorf("failed to set option %v: %w", name, err)
+		}
+		optionFile.Close() // In a loop, defer'ing could open a lot of FDs
+	}
+	return nil
+}
+
 func (s *Service) Run(ctx context.Context) error {
 	logger := supervisor.Logger(ctx)
 	dnsSvc := dns.New(s.config.CorednsRegistrationChan)
@@ -179,8 +201,25 @@
 		logger.Fatalf("Failed to set up nftables base chains: %v", err)
 	}
 
-	if err := ioutil.WriteFile("/proc/sys/net/ipv4/ip_forward", []byte("1\n"), 0644); err != nil {
-		logger.Fatalf("Failed to enable IPv4 forwarding: %v", err)
+	sysctlOpts := sysctlOptions{
+		// Enable IP forwarding for our pods
+		"net.ipv4.ip_forward": "1",
+		// Enable strict reverse path filtering on all interfaces (important for spoofing prevention from Pods with CAP_NET_ADMIN)
+		"net.ipv4.conf.all.rp_filter": "1",
+		// Disable source routing
+		"net.ipv4.conf.all.accept_source_route": "0",
+
+		// Set congestion control to Google BBR
+		"net.ipv4.tcp_congestion_control": "bbr",
+
+		// Increase Linux socket kernel buffer sizes to 16MiB (needed for fast datacenter networks)
+		"net.core.rmem_max": "16777216",
+		"net.core.wmem_max": "16777216",
+		"net.ipv4.tcp_rmem": "4096 87380 16777216",
+		"net.ipv4.tcp_wmem": "4096 87380 16777216",
+	}
+	if err := sysctlOpts.apply(); err != nil {
+		logger.Fatalf("Failed to set up kernel network config: %v", err)
 	}
 
 	supervisor.Signal(ctx, supervisor.SignalHealthy)