m/n/c/network: add quirks infra and i40e quirk

This adds the applyQuirks function which is called during early
initialization of the network stack, before any network interfaces are
enabled. This function applies device and/or driver-specific fixups to
make them work better. For that purpose it examines relevant metadata
(driver in use, firmware version and OpROM version) to decide which
quirks should be applied to which device.

As we do not yet have another way of exposing firmware versions, this
also takes care of logging all non-zero firmware versions.

It also adds a first quirk for i40e which disables firmware-based LLDP
procesing.

Change-Id: I456753880102dfb5b3e94847cf5627a003d70eeb
Reviewed-on: https://review.monogon.dev/c/monogon/+/2344
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/core/network/BUILD.bazel b/metropolis/node/core/network/BUILD.bazel
index 4f5a148..a9a6309 100644
--- a/metropolis/node/core/network/BUILD.bazel
+++ b/metropolis/node/core/network/BUILD.bazel
@@ -4,6 +4,7 @@
     name = "network",
     srcs = [
         "main.go",
+        "quirks.go",
         "static.go",
     ],
     importpath = "source.monogon.dev/metropolis/node/core/network",
@@ -22,6 +23,7 @@
         "@com_github_google_nftables//:nftables",
         "@com_github_google_nftables//expr",
         "@com_github_insomniacslk_dhcp//dhcpv4",
+        "@com_github_mdlayher_ethtool//:ethtool",
         "@com_github_vishvananda_netlink//:netlink",
         "@org_golang_x_sys//unix",
     ],
diff --git a/metropolis/node/core/network/main.go b/metropolis/node/core/network/main.go
index 6532404..f420d55 100644
--- a/metropolis/node/core/network/main.go
+++ b/metropolis/node/core/network/main.go
@@ -220,6 +220,11 @@
 	if err := earlySysctlOpts.Apply(); err != nil {
 		logger.Fatalf("Error configuring early sysctl options: %v", err)
 	}
+
+	if err := applyQuirks(logger); err != nil {
+		logger.Errorf("Applying quirks failed, continuing without: %v", err)
+	}
+
 	// Choose between autoconfig and static config runnables
 	if s.StaticConfig == nil {
 		supervisor.Run(ctx, "dynamic", s.runDynamicConfig)
diff --git a/metropolis/node/core/network/quirks.go b/metropolis/node/core/network/quirks.go
new file mode 100644
index 0000000..6dd5808
--- /dev/null
+++ b/metropolis/node/core/network/quirks.go
@@ -0,0 +1,76 @@
+package network
+
+import (
+	"errors"
+	"fmt"
+	"strings"
+
+	"github.com/mdlayher/ethtool"
+	"github.com/vishvananda/netlink"
+	"golang.org/x/sys/unix"
+
+	"source.monogon.dev/metropolis/pkg/logtree"
+)
+
+// applyQuirks applies settings to drivers and/or hardware to make it work
+// better (i.e. with less crashes or faster).
+func applyQuirks(l logtree.LeveledLogger) error {
+	ethtoolFd, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, unix.IPPROTO_IP)
+	if err != nil {
+		return fmt.Errorf("while creating IP socket for ethtool: %w", err)
+	}
+	defer unix.Close(ethtoolFd)
+	ethtoolC, err := ethtool.New()
+	if err != nil {
+		return fmt.Errorf("while getting ethtool netlink fd: %w", err)
+	}
+	defer ethtoolC.Close()
+	links, err := netlink.LinkList()
+	if err != nil {
+		return fmt.Errorf("while getting links for applying quirks: %w", err)
+	}
+	for _, link := range links {
+		linkinfo, err := unix.IoctlGetEthtoolDrvinfo(ethtoolFd, link.Attrs().Name)
+		if errors.Is(err, unix.EOPNOTSUPP) {
+			// These are normally software/virtual devices which should never
+			// need quirking.
+			continue
+		} else if err != nil {
+			l.Warningf("Unexpected error during ioctl(ETHTOOL_GDRVINFO) for device %q, skipping quirks: %v", link.Attrs().Name, err)
+			continue
+		}
+		driver := unix.ByteSliceToString(linkinfo.Driver[:])
+		firmwareVersion := strings.TrimSpace(unix.ByteSliceToString(linkinfo.Fw_version[:]))
+		opromVersion := strings.TrimSpace(unix.ByteSliceToString(linkinfo.Erom_version[:]))
+
+		// Log firmware version of all NICs which have one as we have currently
+		// no better way of accessing these.
+		if firmwareVersion != "" {
+			if opromVersion != "" {
+				l.Infof("Interface %q (driver %v) has firmware version %q with Option ROM version %q", link.Attrs().Name, driver, firmwareVersion, opromVersion)
+			}
+			l.Infof("Interface %q (driver %v) has firmware version %q", link.Attrs().Name, driver, firmwareVersion)
+		}
+
+		switch driver {
+		case "i40e":
+			err := ethtoolC.SetPrivateFlags(ethtool.PrivateFlags{
+				Interface: ethtool.Interface{Index: link.Attrs().Index},
+				Flags: map[string]bool{
+					// Disable firmware-based LLDP processing as it both makes
+					// LLDP unavailable to the OS as well as being suspected of
+					// causing fimware crashes. Metropolis currently does not
+					// have DCB support anyway and if it gains such support it
+					// will proccess the LLDP packets for that in userspace.
+					"disable-fw-lldp": true,
+				},
+			})
+			if err != nil {
+				l.Warningf("Error when applying quirk for LLDP firmware processing to %q: %v", link.Attrs().Name, err)
+			} else {
+				l.Infof("Applied LLDP firmware processing quirk to %q", link.Attrs().Name)
+			}
+		}
+	}
+	return nil
+}