m/n/b/fwprune: adapt to fsspec and use

This modifies the fwprune tool to generate fsspecs instead of making
copies and makes it take a list of paths for suffix matching instead
of a directory as input. It also adds the fsspec_linux_firmware rule
which uses the utility to actually build a partial fsspec. Finally it
integrates the linux-firmware external repository and uses that rule
to ship firmware in Metropolis.

Change-Id: I0552995105eda84e63d7259040ad36d794079308
Reviewed-on: https://review.monogon.dev/c/monogon/+/534
Reviewed-by: Mateusz Zalega <mateusz@monogon.tech>
diff --git a/metropolis/node/BUILD.bazel b/metropolis/node/BUILD.bazel
index 722f05b..72a8e11 100644
--- a/metropolis/node/BUILD.bazel
+++ b/metropolis/node/BUILD.bazel
@@ -1,6 +1,7 @@
 load("@io_bazel_rules_go//go:def.bzl", "go_library")
 load("//metropolis/node/build:def.bzl", "erofs_image", "verity_image")
 load("//metropolis/node/build:efi.bzl", "efi_unified_kernel_image")
+load("//metropolis/node/build/fwprune:def.bzl", "fsspec_linux_firmware")
 load("@rules_pkg//:pkg.bzl", "pkg_zip")
 
 go_library(
@@ -21,6 +22,12 @@
     },
 )
 
+fsspec_linux_firmware(
+    name = "firmware",
+    firmware_files = ["@linux-firmware//:all_files"],
+    kernel = "//third_party/linux",
+)
+
 erofs_image(
     name = "rootfs",
     extra_dirs = [
@@ -86,6 +93,7 @@
         "@xfsprogs//:mkfs": "/bin/mkfs.xfs",
         "@chrony//:chrony": "/time/chrony",
     },
+    fsspecs = [":firmware"],
     symlinks = {
         "/ephemeral/machine-id": "/etc/machine-id",
         "/ephemeral/hosts": "/etc/hosts",
diff --git a/metropolis/node/build/fwprune/BUILD.bazel b/metropolis/node/build/fwprune/BUILD.bazel
new file mode 100644
index 0000000..18e83b1
--- /dev/null
+++ b/metropolis/node/build/fwprune/BUILD.bazel
@@ -0,0 +1,18 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "source.monogon.dev/metropolis/node/build/fwprune",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//metropolis/node/build/fsspec:go_default_library",
+        "@org_golang_google_protobuf//encoding/prototext:go_default_library",
+    ],
+)
+
+go_binary(
+    name = "fwprune",
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/node/build/fwprune/def.bzl b/metropolis/node/build/fwprune/def.bzl
new file mode 100644
index 0000000..b43b1d1
--- /dev/null
+++ b/metropolis/node/build/fwprune/def.bzl
@@ -0,0 +1,52 @@
+load("//metropolis/node/build:def.bzl", "FSSpecInfo")
+
+def _fsspec_linux_firmware(ctx):
+    fsspec_out = ctx.actions.declare_file(ctx.label.name + ".prototxt")
+
+    fwlist = ctx.actions.declare_file(ctx.label.name + "-fwlist.txt")
+    ctx.actions.write(
+        output = fwlist,
+        content = "\n".join([f.path for f in ctx.files.firmware_files]),
+    )
+
+    modinfo = ctx.attr.kernel[OutputGroupInfo].modinfo.to_list()[0]
+
+    ctx.actions.run(
+        outputs = [fsspec_out],
+        inputs = [fwlist, modinfo] + ctx.files.firmware_files,
+        tools = [ctx.executable._fwprune],
+        executable = ctx.executable._fwprune,
+        arguments = [modinfo.path, fwlist.path, fsspec_out.path],
+    )
+
+    return [DefaultInfo(files = depset([fsspec_out])), FSSpecInfo(spec = fsspec_out, referenced = ctx.files.firmware_files)]
+
+fsspec_linux_firmware = rule(
+    implementation = _fsspec_linux_firmware,
+    doc = """
+         Generates a partial filesystem spec containing all firmware files required by a given kernel at the
+         default firmware load path (/lib/firmware).
+    """,
+    attrs = {
+        "firmware_files": attr.label_list(
+            mandatory = True,
+            allow_files = True,
+            doc = """
+               List of firmware files. Generally at least a filegroup of the linux-firmware repository should
+               be in here.
+            """,
+        ),
+        "kernel": attr.label(
+            doc = """
+                Kernel for which firmware should be selected. Needs to have a modinfo OutputGroup.
+            """,
+        ),
+
+        # Tool
+        "_fwprune": attr.label(
+            default = Label("//metropolis/node/build/fwprune"),
+            executable = True,
+            cfg = "exec",
+        ),
+    },
+)
diff --git a/metropolis/node/build/fwprune/main.go b/metropolis/node/build/fwprune/main.go
new file mode 100644
index 0000000..6ad2a93
--- /dev/null
+++ b/metropolis/node/build/fwprune/main.go
@@ -0,0 +1,116 @@
+// fwprune is a buildsystem utility that filters linux-firmware repository
+// contents to include only files required by the built-in kernel modules,
+// that are specified in modules.builtin.modinfo.
+// (see: https://www.kernel.org/doc/Documentation/kbuild/kbuild.txt)
+package main
+
+import (
+	"bytes"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+
+	"google.golang.org/protobuf/encoding/prototext"
+
+	"source.monogon.dev/metropolis/node/build/fsspec"
+)
+
+// fwPaths returns a slice of filesystem paths relative to the root of the
+// linux-firmware repository, pointing at firmware files, according to contents
+// of the kernel build side effect: modules.builtin.modinfo.
+func fwPaths(mi []byte) []string {
+	// Use a map pset to deduplicate firmware paths.
+	pset := make(map[string]bool)
+	// Get a slice of entries of the form "unix.license=GPL" from mi. Then extract
+	// firmware information from it.
+	entries := bytes.Split(mi, []byte{0})
+	for _, entry := range entries {
+		// Skip empty entries.
+		if len(entry) == 0 {
+			continue
+		}
+		// Parse the entries. Split each entry into a key-value pair, separated
+		// by "=".
+		kv := strings.SplitN(string(entry), "=", 2)
+		key, value := kv[0], kv[1]
+		// Split the key into a module.attribute] pair, such as "unix.license".
+		ma := strings.SplitN(key, ".", 2)
+		// Skip, if it's not a firmware entry, according to the attribute.
+		if ma[1] != "firmware" {
+			continue
+		}
+		// If it is though, value holds a firmware path.
+		pset[value] = true
+	}
+	// Convert the deduplicated pset to a slice.
+	pslice := make([]string, 0, len(pset))
+	for p, _ := range pset {
+		pslice = append(pslice, p)
+	}
+	sort.Strings(pslice)
+	return pslice
+}
+
+// fwprune takes a modinfo file from the kernel and extracts a list of all
+// firmware files requested by all modules in that file. It then takes all
+// available firmware file paths (newline-separated in the firmwareList file)
+// and tries to match the requested file paths as a suffix of them.
+// For example if a module requests firmware foo/bar.bin and in the firmware list
+// there is a file at path build-out/x/y/foo/bar.bin it will use that file.
+// Finally it generates an fsspec placing each file under its requested path
+// under /lib/firmware.
+func main() {
+	if len(os.Args) != 4 {
+		log.Fatal("Usage: fwprune modules.builtin.modinfo firmwareListPath outSpec")
+	}
+	modinfo := os.Args[1]
+	firmwareListPath := os.Args[2]
+	outSpec := os.Args[3]
+
+	allFirmwareData, err := os.ReadFile(firmwareListPath)
+	if err != nil {
+		log.Fatalf("Failed to read firmware source list: %v", err)
+	}
+	allFirmwarePaths := strings.Split(string(allFirmwareData), "\n")
+
+	// Create a look-up table of all possible suffixes to their full paths as
+	// this is much faster at O(n) than calling strings.HasSuffix for every
+	// possible combination which is O(n^2).
+	suffixLUT := make(map[string]string)
+	for _, firmwarePath := range allFirmwarePaths {
+		pathParts := strings.Split(firmwarePath, string(os.PathSeparator))
+		for i := range pathParts {
+			suffixLUT[filepath.Join(pathParts[i:len(pathParts)]...)] = firmwarePath
+		}
+	}
+
+	// Get the firmware file paths used by modules according to modinfo data
+	mi, err := os.ReadFile(modinfo)
+	if err != nil {
+		log.Fatalf("While reading modinfo: %v", err)
+	}
+	fwp := fwPaths(mi)
+
+	var files []*fsspec.File
+
+	for _, p := range fwp {
+		sourcePath := suffixLUT[p]
+		if sourcePath == "" {
+			// This should not be fatal as sometimes linux-firmware cannot
+			// ship all firmware usable by the kernel for mostly legal reasons.
+			log.Printf("WARNING: Requested firmware %q not found", p)
+			continue
+		}
+		files = append(files, &fsspec.File{
+			Path:       filepath.Join("/lib/firmware", p),
+			Mode:       0444,
+			SourcePath: sourcePath,
+		})
+	}
+	fsspecRaw, err := prototext.Marshal(&fsspec.FSSpec{File: files})
+	if err := os.WriteFile(outSpec, fsspecRaw, 0644); err != nil {
+		log.Fatalf("failed writing output: %v", err)
+	}
+}