m/n/b/fwprune: adapt to fsspec and use

This modifies the fwprune tool to generate fsspecs instead of making
copies and makes it take a list of paths for suffix matching instead
of a directory as input. It also adds the fsspec_linux_firmware rule
which uses the utility to actually build a partial fsspec. Finally it
integrates the linux-firmware external repository and uses that rule
to ship firmware in Metropolis.

Change-Id: I0552995105eda84e63d7259040ad36d794079308
Reviewed-on: https://review.monogon.dev/c/monogon/+/534
Reviewed-by: Mateusz Zalega <mateusz@monogon.tech>
diff --git a/metropolis/build/fwprune/BUILD.bazel b/metropolis/build/fwprune/BUILD.bazel
deleted file mode 100644
index 55aa5c5..0000000
--- a/metropolis/build/fwprune/BUILD.bazel
+++ /dev/null
@@ -1,14 +0,0 @@
-load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
-
-go_library(
-    name = "go_default_library",
-    srcs = ["main.go"],
-    importpath = "source.monogon.dev/metropolis/build/fwprune",
-    visibility = ["//visibility:private"],
-)
-
-go_binary(
-    name = "fwprune",
-    embed = [":go_default_library"],
-    visibility = ["//visibility:public"],
-)
diff --git a/metropolis/build/fwprune/main.go b/metropolis/build/fwprune/main.go
deleted file mode 100644
index 5791b9e..0000000
--- a/metropolis/build/fwprune/main.go
+++ /dev/null
@@ -1,138 +0,0 @@
-// fwprune is a buildsystem utility that filters linux-firmware repository
-// contents to include only files required by the built-in kernel modules,
-// that are specified in modules.builtin.modinfo.
-// (see: https://www.kernel.org/doc/Documentation/kbuild/kbuild.txt)
-package main
-
-import (
-	"bytes"
-	"fmt"
-	"io"
-	"log"
-	"os"
-	"path/filepath"
-	"sort"
-	"strings"
-)
-
-// fwPaths returns a slice of filesystem paths relative to the root of the
-// linux-firmware repository, pointing at firmware files, according to contents
-// of the kernel build side effect: modules.builtin.modinfo.
-func fwPaths(mi []byte) []string {
-	// Use a map pset to deduplicate firmware paths.
-	pset := make(map[string]bool)
-	// Get a slice of entries of the form "unix.license=GPL" from mi. Then extract
-	// firmware information from it.
-	entries := bytes.Split(mi, []byte{0})
-	for _, entry := range entries {
-		// Skip empty entries.
-		if len(entry) == 0 {
-			continue
-		}
-		// Parse the entries. Split each entry into a key-value pair, separated
-		// by "=".
-		kv := strings.SplitN(string(entry), "=", 2)
-		key, value := kv[0], kv[1]
-		// Split the key into a module.attribute] pair, such as "unix.license".
-		ma := strings.SplitN(key, ".", 2)
-		// Skip, if it's not a firmware entry, according to the attribute.
-		if ma[1] != "firmware" {
-			continue
-		}
-		// If it is though, value holds a firmware path.
-		pset[value] = true
-	}
-	// Convert the deduplicated pset to a slice.
-	pslice := make([]string, 0, len(pset))
-	for p, _ := range pset {
-		pslice = append(pslice, p)
-	}
-	sort.Strings(pslice)
-	return pslice
-}
-
-// fwDirs returns a slice of filesystem paths relative to the root of
-// linux-firmware repository, pointing at directories that need to exist before
-// files specified by fwp paths can be created.
-func fwDirs(fwp []string) []string {
-	// Use a map dset to deduplicate directory paths.
-	dset := make(map[string]bool)
-	for _, p := range fwp {
-		dp := filepath.Dir(p)
-		dset[dp] = true
-	}
-	// Convert dset to a slice.
-	dslice := make([]string, 0, len(dset))
-	for d, _ := range dset {
-		dslice = append(dslice, d)
-	}
-	sort.Strings(dslice)
-	return dslice
-}
-
-// copyFile copies a file at filesystem path src to dst. dst must not point to
-// an existing file. It may return an IO error.
-func copyFile(dst, src string) error {
-	i, err := os.Open(src)
-	if err != nil {
-		return err
-	}
-	defer i.Close()
-
-	o, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE, 0770)
-	if err != nil {
-		return err
-	}
-	defer o.Close()
-
-	if _, err := io.Copy(o, i); err != nil {
-		return err
-	}
-	return nil
-}
-
-func main() {
-	// The directory at fwdst will be filled with firmware required by the kernel
-	// builtins specified in modules.builtin.modinfo [1]. fwsrc must point to the
-	// linux-firmware repository [2]. All parameters must be filesystem paths. The
-	// necessary parts of the original directory layout will be recreated at fwdst.
-	// fwprune will output a list of directories and files it creates.
-	// [1] https://www.kernel.org/doc/Documentation/kbuild/kbuild.txt
-	// [2] https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git
-	if len(os.Args) != 4 {
-		// Print usage information, if misused.
-		fmt.Println("Usage: fwprune modules.builtin.modinfo fwsrc fwdst")
-		os.Exit(1)
-	}
-	modinfo := os.Args[1]
-	fwsrc := os.Args[2]
-	fwdst := os.Args[3]
-
-	// Get the firmware file paths.
-	mi, err := os.ReadFile(modinfo)
-	if err != nil {
-		log.Fatalf("While reading modinfo: %v", err)
-	}
-	fwp := fwPaths(mi)
-
-	// Recreate the necessary parts of the linux-firmware directory tree.
-	fwd := fwDirs(fwp)
-	for _, rd := range fwd {
-		d := filepath.Join(fwdst, rd)
-		if err := os.MkdirAll(d, 0770); err != nil {
-			log.Fatalf("Couldn't create a subdirectory: %v", err)
-		}
-		fmt.Println(d)
-	}
-
-	// Copy the files specified by fwp.
-	for _, p := range fwp {
-		dst := filepath.Join(fwdst, p)
-		src := filepath.Join(fwsrc, p)
-
-		if err := copyFile(dst, src); err != nil {
-			log.Fatalf("Couldn't provide %q: %v", dst, err)
-		}
-		fmt.Println(p)
-	}
-}
diff --git a/metropolis/node/BUILD.bazel b/metropolis/node/BUILD.bazel
index 722f05b..72a8e11 100644
--- a/metropolis/node/BUILD.bazel
+++ b/metropolis/node/BUILD.bazel
@@ -1,6 +1,7 @@
 load("@io_bazel_rules_go//go:def.bzl", "go_library")
 load("//metropolis/node/build:def.bzl", "erofs_image", "verity_image")
 load("//metropolis/node/build:efi.bzl", "efi_unified_kernel_image")
+load("//metropolis/node/build/fwprune:def.bzl", "fsspec_linux_firmware")
 load("@rules_pkg//:pkg.bzl", "pkg_zip")
 
 go_library(
@@ -21,6 +22,12 @@
     },
 )
 
+fsspec_linux_firmware(
+    name = "firmware",
+    firmware_files = ["@linux-firmware//:all_files"],
+    kernel = "//third_party/linux",
+)
+
 erofs_image(
     name = "rootfs",
     extra_dirs = [
@@ -86,6 +93,7 @@
         "@xfsprogs//:mkfs": "/bin/mkfs.xfs",
         "@chrony//:chrony": "/time/chrony",
     },
+    fsspecs = [":firmware"],
     symlinks = {
         "/ephemeral/machine-id": "/etc/machine-id",
         "/ephemeral/hosts": "/etc/hosts",
diff --git a/metropolis/node/build/fwprune/BUILD.bazel b/metropolis/node/build/fwprune/BUILD.bazel
new file mode 100644
index 0000000..18e83b1
--- /dev/null
+++ b/metropolis/node/build/fwprune/BUILD.bazel
@@ -0,0 +1,18 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "source.monogon.dev/metropolis/node/build/fwprune",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//metropolis/node/build/fsspec:go_default_library",
+        "@org_golang_google_protobuf//encoding/prototext:go_default_library",
+    ],
+)
+
+go_binary(
+    name = "fwprune",
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/node/build/fwprune/def.bzl b/metropolis/node/build/fwprune/def.bzl
new file mode 100644
index 0000000..b43b1d1
--- /dev/null
+++ b/metropolis/node/build/fwprune/def.bzl
@@ -0,0 +1,52 @@
+load("//metropolis/node/build:def.bzl", "FSSpecInfo")
+
+def _fsspec_linux_firmware(ctx):
+    fsspec_out = ctx.actions.declare_file(ctx.label.name + ".prototxt")
+
+    fwlist = ctx.actions.declare_file(ctx.label.name + "-fwlist.txt")
+    ctx.actions.write(
+        output = fwlist,
+        content = "\n".join([f.path for f in ctx.files.firmware_files]),
+    )
+
+    modinfo = ctx.attr.kernel[OutputGroupInfo].modinfo.to_list()[0]
+
+    ctx.actions.run(
+        outputs = [fsspec_out],
+        inputs = [fwlist, modinfo] + ctx.files.firmware_files,
+        tools = [ctx.executable._fwprune],
+        executable = ctx.executable._fwprune,
+        arguments = [modinfo.path, fwlist.path, fsspec_out.path],
+    )
+
+    return [DefaultInfo(files = depset([fsspec_out])), FSSpecInfo(spec = fsspec_out, referenced = ctx.files.firmware_files)]
+
+fsspec_linux_firmware = rule(
+    implementation = _fsspec_linux_firmware,
+    doc = """
+         Generates a partial filesystem spec containing all firmware files required by a given kernel at the
+         default firmware load path (/lib/firmware).
+    """,
+    attrs = {
+        "firmware_files": attr.label_list(
+            mandatory = True,
+            allow_files = True,
+            doc = """
+               List of firmware files. Generally at least a filegroup of the linux-firmware repository should
+               be in here.
+            """,
+        ),
+        "kernel": attr.label(
+            doc = """
+                Kernel for which firmware should be selected. Needs to have a modinfo OutputGroup.
+            """,
+        ),
+
+        # Tool
+        "_fwprune": attr.label(
+            default = Label("//metropolis/node/build/fwprune"),
+            executable = True,
+            cfg = "exec",
+        ),
+    },
+)
diff --git a/metropolis/node/build/fwprune/main.go b/metropolis/node/build/fwprune/main.go
new file mode 100644
index 0000000..6ad2a93
--- /dev/null
+++ b/metropolis/node/build/fwprune/main.go
@@ -0,0 +1,116 @@
+// fwprune is a buildsystem utility that filters linux-firmware repository
+// contents to include only files required by the built-in kernel modules,
+// that are specified in modules.builtin.modinfo.
+// (see: https://www.kernel.org/doc/Documentation/kbuild/kbuild.txt)
+package main
+
+import (
+	"bytes"
+	"log"
+	"os"
+	"path/filepath"
+	"sort"
+	"strings"
+
+	"google.golang.org/protobuf/encoding/prototext"
+
+	"source.monogon.dev/metropolis/node/build/fsspec"
+)
+
+// fwPaths returns a slice of filesystem paths relative to the root of the
+// linux-firmware repository, pointing at firmware files, according to contents
+// of the kernel build side effect: modules.builtin.modinfo.
+func fwPaths(mi []byte) []string {
+	// Use a map pset to deduplicate firmware paths.
+	pset := make(map[string]bool)
+	// Get a slice of entries of the form "unix.license=GPL" from mi. Then extract
+	// firmware information from it.
+	entries := bytes.Split(mi, []byte{0})
+	for _, entry := range entries {
+		// Skip empty entries.
+		if len(entry) == 0 {
+			continue
+		}
+		// Parse the entries. Split each entry into a key-value pair, separated
+		// by "=".
+		kv := strings.SplitN(string(entry), "=", 2)
+		key, value := kv[0], kv[1]
+		// Split the key into a module.attribute] pair, such as "unix.license".
+		ma := strings.SplitN(key, ".", 2)
+		// Skip, if it's not a firmware entry, according to the attribute.
+		if ma[1] != "firmware" {
+			continue
+		}
+		// If it is though, value holds a firmware path.
+		pset[value] = true
+	}
+	// Convert the deduplicated pset to a slice.
+	pslice := make([]string, 0, len(pset))
+	for p, _ := range pset {
+		pslice = append(pslice, p)
+	}
+	sort.Strings(pslice)
+	return pslice
+}
+
+// fwprune takes a modinfo file from the kernel and extracts a list of all
+// firmware files requested by all modules in that file. It then takes all
+// available firmware file paths (newline-separated in the firmwareList file)
+// and tries to match the requested file paths as a suffix of them.
+// For example if a module requests firmware foo/bar.bin and in the firmware list
+// there is a file at path build-out/x/y/foo/bar.bin it will use that file.
+// Finally it generates an fsspec placing each file under its requested path
+// under /lib/firmware.
+func main() {
+	if len(os.Args) != 4 {
+		log.Fatal("Usage: fwprune modules.builtin.modinfo firmwareListPath outSpec")
+	}
+	modinfo := os.Args[1]
+	firmwareListPath := os.Args[2]
+	outSpec := os.Args[3]
+
+	allFirmwareData, err := os.ReadFile(firmwareListPath)
+	if err != nil {
+		log.Fatalf("Failed to read firmware source list: %v", err)
+	}
+	allFirmwarePaths := strings.Split(string(allFirmwareData), "\n")
+
+	// Create a look-up table of all possible suffixes to their full paths as
+	// this is much faster at O(n) than calling strings.HasSuffix for every
+	// possible combination which is O(n^2).
+	suffixLUT := make(map[string]string)
+	for _, firmwarePath := range allFirmwarePaths {
+		pathParts := strings.Split(firmwarePath, string(os.PathSeparator))
+		for i := range pathParts {
+			suffixLUT[filepath.Join(pathParts[i:len(pathParts)]...)] = firmwarePath
+		}
+	}
+
+	// Get the firmware file paths used by modules according to modinfo data
+	mi, err := os.ReadFile(modinfo)
+	if err != nil {
+		log.Fatalf("While reading modinfo: %v", err)
+	}
+	fwp := fwPaths(mi)
+
+	var files []*fsspec.File
+
+	for _, p := range fwp {
+		sourcePath := suffixLUT[p]
+		if sourcePath == "" {
+			// This should not be fatal as sometimes linux-firmware cannot
+			// ship all firmware usable by the kernel for mostly legal reasons.
+			log.Printf("WARNING: Requested firmware %q not found", p)
+			continue
+		}
+		files = append(files, &fsspec.File{
+			Path:       filepath.Join("/lib/firmware", p),
+			Mode:       0444,
+			SourcePath: sourcePath,
+		})
+	}
+	fsspecRaw, err := prototext.Marshal(&fsspec.FSSpec{File: files})
+	if err := os.WriteFile(outSpec, fsspecRaw, 0644); err != nil {
+		log.Fatalf("failed writing output: %v", err)
+	}
+}