m/n/build: implement new fsspec infrastructure

This makes the node_initramfs and erofs_image use the new common fsspec
infrastructure. It also adds the fsspecs attribute to both which can
later be used to add arbitrary fsspecs.

Change-Id: I384e04712c0a70f82c5c975911cbb1d0d5e6cabc
Reviewed-on: https://review.monogon.dev/c/monogon/+/530
Reviewed-by: Sergiusz Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/build/def.bzl b/metropolis/node/build/def.bzl
index 72e548b..c456a94 100644
--- a/metropolis/node/build/def.bzl
+++ b/metropolis/node/build/def.bzl
@@ -50,219 +50,21 @@
     ],
 )
 
-def _node_initramfs_impl(ctx):
-    """
-    Generate an lz4-compressed initramfs based on a label/file list.
-    """
-
-    # Generate config file for gen_init_cpio that describes the initramfs to build.
-    cpio_list_name = ctx.label.name + ".cpio_list"
-    cpio_list = ctx.actions.declare_file(cpio_list_name)
-
-    # Start out with some standard initramfs device files.
-    cpio_list_content = [
-        "dir /dev 0755 0 0",
-        "nod /dev/console 0600 0 0 c 5 1",
-        "nod /dev/null 0644 0 0 c 1 3",
-        "nod /dev/kmsg 0644 0 0 c 1 11",
-        "nod /dev/ptmx 0644 0 0 c 5 2",
-    ]
-
-    # Find all directories that need to be created.
-    directories_needed = []
-    for _, p in ctx.attr.files.items():
-        if not p.startswith("/"):
-            fail("file {} invalid: must begin with /".format(p))
-
-        # Get all intermediate directories on path to file
-        parts = p.split("/")[1:-1]
-        directories_needed.append(parts)
-
-    for _, p in ctx.attr.files_cc.items():
-        if not p.startswith("/"):
-            fail("file {} invalid: must begin with /".format(p))
-
-        # Get all intermediate directories on path to file
-        parts = p.split("/")[1:-1]
-        directories_needed.append(parts)
-
-    # Extend with extra directories defined by user.
-    for p in ctx.attr.extra_dirs:
-        if not p.startswith("/"):
-            fail("directory {} invalid: must begin with /".format(p))
-
-        parts = p.split("/")[1:]
-        directories_needed.append(parts)
-
-    directories = []
-    for parts in directories_needed:
-        # Turn directory parts [usr, local, bin] into successive subpaths [/usr, /usr/local, /usr/local/bin].
-        last = ""
-        for part in parts:
-            last += "/" + part
-
-            # TODO(q3k): this is slow - this should be a set instead, but starlark doesn't implement them.
-            # For the amount of files we're dealing with this doesn't matter, but all stars are pointing towards this
-            # becoming accidentally quadratic at some point in the future.
-            if last not in directories:
-                directories.append(last)
-
-    # Append instructions to create directories.
-    # Serendipitously, the directories should already be in the right order due to us not using a set to create the
-    # list. They might not be in an elegant order (ie, if files [/foo/one/one, /bar, /foo/two/two] are request, the
-    # order will be [/foo, /foo/one, /bar, /foo/two]), but that's fine.
-    for d in directories:
-        cpio_list_content.append("dir {} 0755 0 0".format(d))
-
-    # Append instructions to add files.
-    inputs = []
-    for label, p in ctx.attr.files.items():
-        # Figure out if this is an executable.
-        is_executable = True
-
-        di = label[DefaultInfo]
-        if di.files_to_run.executable == None:
-            # Generated non-executable files will have DefaultInfo.files_to_run.executable == None
-            is_executable = False
-        elif di.files_to_run.executable.is_source:
-            # Source files will have executable.is_source == True
-            is_executable = False
-
-        # Ensure only single output is declared.
-        # If you hit this error, figure out a better logic to find what file you need, maybe looking at providers other
-        # than DefaultInfo.
-        files = di.files.to_list()
-        if len(files) > 1:
-            fail("file {} has more than one output: {}", p, files)
-        src = files[0]
-        inputs.append(src)
-
-        mode = "0755" if is_executable else "0444"
-
-        cpio_list_content.append("file {} {} {} 0 0".format(p, src.path, mode))
-
-    for label, p in ctx.attr.files_cc.items():
-        # Figure out if this is an executable.
-        is_executable = True
-
-        di = label[DefaultInfo]
-        if di.files_to_run.executable == None:
-            # Generated non-executable files will have DefaultInfo.files_to_run.executable == None
-            is_executable = False
-        elif di.files_to_run.executable.is_source:
-            # Source files will have executable.is_source == True
-            is_executable = False
-
-        # Ensure only single output is declared.
-        # If you hit this error, figure out a better logic to find what file you need, maybe looking at providers other
-        # than DefaultInfo.
-        files = di.files.to_list()
-        if len(files) > 1:
-            fail("file {} has more than one output: {}", p, files)
-        src = files[0]
-        inputs.append(src)
-
-        mode = "0755" if is_executable else "0444"
-
-        cpio_list_content.append("file {} {} {} 0 0".format(p, src.path, mode))
-
-    # Write cpio_list.
-    ctx.actions.write(cpio_list, "\n".join(cpio_list_content))
-
-    gen_init_cpio = ctx.executable._gen_init_cpio
-    savestdout = ctx.executable._savestdout
-    lz4 = ctx.executable._lz4
-
-    # Generate 'raw' (uncompressed) initramfs
-    initramfs_raw_name = ctx.label.name
-    initramfs_raw = ctx.actions.declare_file(initramfs_raw_name)
-    ctx.actions.run(
-        outputs = [initramfs_raw],
-        inputs = [cpio_list] + inputs,
-        tools = [savestdout, gen_init_cpio],
-        executable = savestdout,
-        arguments = [initramfs_raw.path, gen_init_cpio.path, cpio_list.path],
-    )
-
-    # Compress raw initramfs using lz4c.
-    initramfs_name = ctx.label.name + ".lz4"
-    initramfs = ctx.actions.declare_file(initramfs_name)
-    ctx.actions.run(
-        outputs = [initramfs],
-        inputs = [initramfs_raw],
-        tools = [savestdout, lz4],
-        executable = lz4.path,
-        arguments = ["-l", initramfs_raw.path, initramfs.path],
-    )
-
-    # TODO(q3k): Document why this is needed
-    return [DefaultInfo(runfiles = ctx.runfiles(files = [initramfs]), files = depset([initramfs]))]
-
-node_initramfs = rule(
-    implementation = _node_initramfs_impl,
-    doc = """
-        Build a node initramfs. The initramfs will contain a basic /dev directory and all the files specified by the
-        `files` attribute. Executable files will have their permissions set to 0755, non-executable files will have
-        their permissions set to 0444. All parent directories will be created with 0755 permissions.
-    """,
-    attrs = {
-        "files": attr.label_keyed_string_dict(
-            mandatory = True,
-            allow_files = True,
-            doc = """
-                Dictionary of Labels to String, placing a given Label's output file in the initramfs at the location
-                specified by the String value. The specified labels must only have a single output.
-            """,
-            # Attach pure transition to ensure all binaries added to the initramfs are pure/static binaries.
-            cfg = build_pure_transition,
-        ),
-        "files_cc": attr.label_keyed_string_dict(
-            allow_files = True,
-            doc = """
-                 Special case of 'files' for compilation targets that need to be built with the musl toolchain like
-                 go_binary targets which need cgo or cc_binary targets.
-            """,
-            # Attach static transition to all files_cc inputs to ensure they are built with musl and static.
-            cfg = build_static_transition,
-        ),
-        "extra_dirs": attr.string_list(
-            default = [],
-            doc = """
-                Extra directories to create. These will be created in addition to all the directories required to
-                contain the files specified in the `files` attribute.
-            """,
-        ),
-
-        # Tools, implicit dependencies.
-        "_gen_init_cpio": attr.label(
-            default = Label("@linux//:gen_init_cpio"),
-            executable = True,
-            cfg = "host",
-        ),
-        "_lz4": attr.label(
-            default = Label("@com_github_lz4_lz4//programs:lz4"),
-            executable = True,
-            cfg = "host",
-        ),
-        "_savestdout": attr.label(
-            default = Label("//build/savestdout"),
-            executable = True,
-            cfg = "host",
-        ),
-
-        # Allow for transitions to be attached to this rule.
-        "_whitelist_function_transition": attr.label(
-            default = "@bazel_tools//tools/whitelists/function_transition_whitelist",
-        ),
+FSSpecInfo = provider(
+    "Provides parts of an FSSpec used to assemble filesystem images",
+    fields = {
+        "spec": "File containing the partial FSSpec as prototext",
+        "referenced": "Files (potentially) referenced by the spec",
     },
 )
 
-def _erofs_image_impl(ctx):
+def _fsspec_core_impl(ctx, tool, output_file, builtin_fsspec):
     """
-    Generate an EROFS filesystem based on a label/file list.
+    _fsspec_core_impl implements the core of an fsspec-based rule. It takes
+    input from the `files`,`files_cc`, `extra_dirs`, `symlinks` and `fsspecs`
+    attributes and calls `tool` with the `-out` parameter pointing to
+    `output_file` and paths to all fsspecs as positional arguments.
     """
-
-    # Generate config file for gen_init_cpio that describes the initramfs to build.
     fs_spec_name = ctx.label.name + ".prototxt"
     fs_spec = ctx.actions.declare_file(fs_spec_name)
 
@@ -307,17 +109,119 @@
         fs_symlinks.append(struct(path = p, target_path = target))
 
     fs_spec_content = struct(file = fs_files, directory = fs_dirs, symbolic_link = fs_symlinks)
-    ctx.actions.write(fs_spec, fs_spec_content.to_proto())
+    ctx.actions.write(fs_spec, proto.encode_text(fs_spec_content))
 
+    extra_specs = []
+    if builtin_fsspec != None:
+        builtin_fsspec_file = ctx.actions.declare_file(ctx.label.name + "-builtin.prototxt")
+        ctx.actions.write(builtin_fsspec_file, proto.encode_text(builtin_fsspec))
+        extra_specs.append(builtin_fsspec_file)
+
+    for fsspec in ctx.attr.fsspecs:
+        fsspecInfo = fsspec[FSSpecInfo]
+        extra_specs.append(fsspecInfo.spec)
+        for f in fsspecInfo.referenced:
+            inputs.append(f)
+
+    ctx.actions.run(
+        outputs = [output_file],
+        inputs = [fs_spec] + inputs + extra_specs,
+        tools = [tool],
+        executable = tool,
+        arguments = ["-out", output_file.path, fs_spec.path] + [s.path for s in extra_specs],
+    )
+    return
+
+def _node_initramfs_impl(ctx):
+    # At least /dev/console and /dev/null are required to exist for Linux
+    # to properly boot an init inside the initramfs. Here we additionally
+    # include important device nodes like /dev/kmsg and /dev/ptmx which
+    # might need to be available before a proper device manager is launched.
+    builtin_fsspec = struct(special_file = [
+        struct(path = "/dev/console", mode = 0o600, major = 5, minor = 1),
+        struct(path = "/dev/ptmx", mode = 0o644, major = 5, minor = 2),
+        struct(path = "/dev/null", mode = 0o644, major = 1, minor = 3),
+        struct(path = "/dev/kmsg", mode = 0o644, major = 1, minor = 11),
+    ])
+
+    initramfs_name = ctx.label.name + ".cpio.lz4"
+    initramfs = ctx.actions.declare_file(initramfs_name)
+
+    _fsspec_core_impl(ctx, ctx.executable._mkcpio, initramfs, builtin_fsspec)
+
+    # TODO(q3k): Document why this is needed
+    return [DefaultInfo(runfiles = ctx.runfiles(files = [initramfs]), files = depset([initramfs]))]
+
+node_initramfs = rule(
+    implementation = _node_initramfs_impl,
+    doc = """
+        Build a node initramfs. The initramfs will contain a basic /dev directory and all the files specified by the
+        `files` attribute. Executable files will have their permissions set to 0755, non-executable files will have
+        their permissions set to 0444. All parent directories will be created with 0755 permissions.
+    """,
+    attrs = {
+        "files": attr.label_keyed_string_dict(
+            mandatory = True,
+            allow_files = True,
+            doc = """
+                Dictionary of Labels to String, placing a given Label's output file in the initramfs at the location
+                specified by the String value. The specified labels must only have a single output.
+            """,
+            # Attach pure transition to ensure all binaries added to the initramfs are pure/static binaries.
+            cfg = build_pure_transition,
+        ),
+        "files_cc": attr.label_keyed_string_dict(
+            allow_files = True,
+            doc = """
+                 Special case of 'files' for compilation targets that need to be built with the musl toolchain like
+                 go_binary targets which need cgo or cc_binary targets.
+            """,
+            # Attach static transition to all files_cc inputs to ensure they are built with musl and static.
+            cfg = build_static_transition,
+        ),
+        "extra_dirs": attr.string_list(
+            default = [],
+            doc = """
+                Extra directories to create. These will be created in addition to all the directories required to
+                contain the files specified in the `files` attribute.
+            """,
+        ),
+        "symlinks": attr.string_dict(
+            default = {},
+            doc = """
+                Symbolic links to create. Similar format as in files and files_cc, so the target of the symlink is the
+                key and the value of it is the location of the symlink itself. Only raw strings are allowed as targets,
+                labels are not permitted. Include the file using files or files_cc, then symlink to its location.
+            """,
+        ),
+        "fsspecs": attr.label_list(
+            default = [],
+            doc = """
+                List of file system specs (metropolis.node.build.fsspec.FSSpec) to also include in the resulting image.
+                These will be merged with all other given attributes.
+            """,
+            providers = [FSSpecInfo],
+        ),
+
+        # Tool
+        "_mkcpio": attr.label(
+            default = Label("//metropolis/node/build/mkcpio"),
+            executable = True,
+            cfg = "exec",
+        ),
+
+        # Allow for transitions to be attached to this rule.
+        "_whitelist_function_transition": attr.label(
+            default = "@bazel_tools//tools/whitelists/function_transition_whitelist",
+        ),
+    },
+)
+
+def _erofs_image_impl(ctx):
     fs_name = ctx.label.name + ".img"
     fs_out = ctx.actions.declare_file(fs_name)
-    ctx.actions.run(
-        outputs = [fs_out],
-        inputs = [fs_spec] + inputs,
-        tools = [ctx.executable._mkerofs],
-        executable = ctx.executable._mkerofs,
-        arguments = ["-out", fs_out.path, "-spec", fs_spec.path],
-    )
+
+    _fsspec_core_impl(ctx, ctx.executable._mkerofs, fs_out, None)
 
     return [DefaultInfo(files = depset([fs_out]))]
 
@@ -363,6 +267,14 @@
                 labels are not permitted. Include the file using files or files_cc, then symlink to its location.
           """,
         ),
+        "fsspecs": attr.label_list(
+            default = [],
+            doc = """
+                List of file system specs (metropolis.node.build.fsspec.FSSpec) to also include in the resulting image.
+                These will be merged with all other given attributes.
+            """,
+            providers = [FSSpecInfo],
+        ),
 
         # Tools, implicit dependencies.
         "_mkerofs": attr.label(
@@ -381,72 +293,72 @@
 # VerityConfig is emitted by verity_image, and contains a file enclosing a
 # singular dm-verity target table.
 VerityConfig = provider(
-  "Configuration necessary to mount a single dm-verity target.",
-  fields = {
-    "table": "A file containing the dm-verity target table. See: https://www.kernel.org/doc/html/latest/admin-guide/device-mapper/verity.html",
-  },
+    "Configuration necessary to mount a single dm-verity target.",
+    fields = {
+        "table": "A file containing the dm-verity target table. See: https://www.kernel.org/doc/html/latest/admin-guide/device-mapper/verity.html",
+    },
 )
 
 def _verity_image_impl(ctx):
-  """
-  Create a new file containing the source image data together with the Verity
-  metadata appended to it, and provide an associated DeviceMapper Verity target
-  table in a separate file, through VerityConfig provider.
-  """
+    """
+    Create a new file containing the source image data together with the Verity
+    metadata appended to it, and provide an associated DeviceMapper Verity target
+    table in a separate file, through VerityConfig provider.
+    """
 
-  # Run mkverity.
-  image = ctx.actions.declare_file(ctx.attr.name + ".img")
-  table = ctx.actions.declare_file(ctx.attr.name + ".dmt")
-  ctx.actions.run(
-    mnemonic = "GenVerityImage",
-    progress_message = "Generating a dm-verity image",
-    inputs = [ctx.file.source],
-    outputs = [
-      image,
-      table,
-    ],
-    executable = ctx.file._mkverity,
-    arguments = [
-      "-input=" + ctx.file.source.path,
-      "-output=" + image.path,
-      "-table=" + table.path,
-      "-data_alias=" + ctx.attr.rootfs_partlabel,
-      "-hash_alias=" + ctx.attr.rootfs_partlabel,
-    ]
-  )
-
-  return [
-    DefaultInfo(
-      files=depset([image]),
-      runfiles=ctx.runfiles(files=[image])
-    ),
-    VerityConfig(
-      table = table
+    # Run mkverity.
+    image = ctx.actions.declare_file(ctx.attr.name + ".img")
+    table = ctx.actions.declare_file(ctx.attr.name + ".dmt")
+    ctx.actions.run(
+        mnemonic = "GenVerityImage",
+        progress_message = "Generating a dm-verity image",
+        inputs = [ctx.file.source],
+        outputs = [
+            image,
+            table,
+        ],
+        executable = ctx.file._mkverity,
+        arguments = [
+            "-input=" + ctx.file.source.path,
+            "-output=" + image.path,
+            "-table=" + table.path,
+            "-data_alias=" + ctx.attr.rootfs_partlabel,
+            "-hash_alias=" + ctx.attr.rootfs_partlabel,
+        ],
     )
-  ]
+
+    return [
+        DefaultInfo(
+            files = depset([image]),
+            runfiles = ctx.runfiles(files = [image]),
+        ),
+        VerityConfig(
+            table = table,
+        ),
+    ]
 
 verity_image = rule(
-  implementation = _verity_image_impl,
-  doc = """
+    implementation = _verity_image_impl,
+    doc = """
       Build a dm-verity target image by appending Verity metadata to the source
       image. A corresponding dm-verity target table will be made available
       through VerityConfig provider.
   """,
-  attrs = {
-    "source": attr.label(
-      doc = "A source image.",
-      allow_single_file = True,
-    ),
-    "rootfs_partlabel": attr.string(
-      doc = "GPT partition label of the rootfs to be used with dm-mod.create.",
-      default = "PARTLABEL=METROPOLIS-SYSTEM",
-    ),
-    "_mkverity": attr.label(
-      doc = "The mkverity executable needed to generate the image.",
-      default = "//metropolis/node/build/mkverity",
-      allow_single_file = True,
-      executable = True,
-      cfg = "host",
-    ),
-  },
+    attrs = {
+        "source": attr.label(
+            doc = "A source image.",
+            allow_single_file = True,
+        ),
+        "rootfs_partlabel": attr.string(
+            doc = "GPT partition label of the rootfs to be used with dm-mod.create.",
+            default = "PARTLABEL=METROPOLIS-SYSTEM",
+        ),
+        "_mkverity": attr.label(
+            doc = "The mkverity executable needed to generate the image.",
+            default = "//metropolis/node/build/mkverity",
+            allow_single_file = True,
+            executable = True,
+            cfg = "host",
+        ),
+    },
 )
diff --git a/metropolis/node/build/fsspec/BUILD.bazel b/metropolis/node/build/fsspec/BUILD.bazel
index 3a65d97..e0ce66d 100644
--- a/metropolis/node/build/fsspec/BUILD.bazel
+++ b/metropolis/node/build/fsspec/BUILD.bazel
@@ -10,9 +10,11 @@
 
 go_library(
     name = "go_default_library",
+    srcs = ["utils.go"],
     embed = [":fsspec_go_proto"],
     importpath = "source.monogon.dev/metropolis/node/build/fsspec",
     visibility = ["//visibility:public"],
+    deps = ["@com_github_golang_protobuf//proto:go_default_library"],
 )
 
 go_proto_library(
diff --git a/metropolis/node/build/fsspec/utils.go b/metropolis/node/build/fsspec/utils.go
new file mode 100644
index 0000000..2438220
--- /dev/null
+++ b/metropolis/node/build/fsspec/utils.go
@@ -0,0 +1,38 @@
+package fsspec
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/golang/protobuf/proto"
+)
+
+// ReadMergeSpecs reads FSSpecs from all files in paths and merges them into
+// a single FSSpec.
+func ReadMergeSpecs(paths []string) (*FSSpec, error) {
+	var mergedSpec FSSpec
+	for _, p := range paths {
+		specRaw, err := os.ReadFile(p)
+		if err != nil {
+			return nil, fmt.Errorf("failed to open spec: %w", err)
+		}
+
+		var spec FSSpec
+		if err := proto.UnmarshalText(string(specRaw), &spec); err != nil {
+			return nil, fmt.Errorf("failed to parse spec %q: %w", p, err)
+		}
+		for _, f := range spec.File {
+			mergedSpec.File = append(mergedSpec.File, f)
+		}
+		for _, d := range spec.Directory {
+			mergedSpec.Directory = append(mergedSpec.Directory, d)
+		}
+		for _, s := range spec.SymbolicLink {
+			mergedSpec.SymbolicLink = append(mergedSpec.SymbolicLink, s)
+		}
+		for _, s := range spec.SpecialFile {
+			mergedSpec.SpecialFile = append(mergedSpec.SpecialFile, s)
+		}
+	}
+	return &mergedSpec, nil
+}
diff --git a/metropolis/node/build/mkcpio/BUILD.bazel b/metropolis/node/build/mkcpio/BUILD.bazel
new file mode 100644
index 0000000..5a93d3a
--- /dev/null
+++ b/metropolis/node/build/mkcpio/BUILD.bazel
@@ -0,0 +1,20 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "source.monogon.dev/metropolis/node/build/mkcpio",
+    visibility = ["//visibility:private"],
+    deps = [
+        "//metropolis/node/build/fsspec:go_default_library",
+        "@com_github_cavaliergopher_cpio//:go_default_library",
+        "@com_github_pierrec_lz4_v4//:go_default_library",
+        "@org_golang_x_sys//unix:go_default_library",
+    ],
+)
+
+go_binary(
+    name = "mkcpio",
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/node/build/mkcpio/main.go b/metropolis/node/build/mkcpio/main.go
new file mode 100644
index 0000000..10deb5a
--- /dev/null
+++ b/metropolis/node/build/mkcpio/main.go
@@ -0,0 +1,213 @@
+package main
+
+import (
+	"flag"
+	"io"
+	"log"
+	"os"
+	"path"
+	"sort"
+	"strings"
+
+	"github.com/cavaliergopher/cpio"
+	"github.com/pierrec/lz4/v4"
+	"golang.org/x/sys/unix"
+
+	"source.monogon.dev/metropolis/node/build/fsspec"
+)
+
+var (
+	outPath = flag.String("out", "", "Output file path")
+)
+
+type placeEnum int
+
+const (
+	// placeNone implies that currently nothing is placed at that path.
+	// Can be overridden by everything.
+	placeNone placeEnum = 0
+	// placeDirImplicit means that there is currently a implied directory
+	// at the given path. It can be overridden by (and only by) an explicit
+	// directory.
+	placeDirImplicit placeEnum = 1
+	// placeDirExplicit means that there is an explicit (i.e. specified by
+	// the FSSpec) directory at the given path. Nothing else can override
+	// this.
+	placeDirExplicit placeEnum = 2
+	// placeNonDir means that there is a file-type resource (i.e a file, symlink
+	// or special_file) at the given path. Nothing else can override this.
+	placeNonDir placeEnum = 3
+)
+
+// place represents the state a given canonical path is in during metadata
+// construction. Its zero value is { State: placeNone, Inode: nil }.
+type place struct {
+	State placeEnum
+	// Inode contains one of the types inside an FSSpec (e.g. *fsspec.File)
+	Inode interface{}
+}
+
+// Usage: -out <out-path.cpio.lz4> fsspec-path...
+func main() {
+	flag.Parse()
+	outFile, err := os.Create(*outPath)
+	if err != nil {
+		log.Fatalf("Failed to open CPIO output file: %v", err)
+	}
+	defer outFile.Close()
+	compressedOut := lz4.NewWriter(outFile)
+	compressedOut.Apply(lz4.LegacyOption(true))
+	defer compressedOut.Close()
+	cpioWriter := cpio.NewWriter(compressedOut)
+	defer cpioWriter.Close()
+
+	spec, err := fsspec.ReadMergeSpecs(flag.Args())
+	if err != nil {
+		log.Fatalf("failed to load specs: %v", err)
+	}
+
+	// Map of paths to metadata for validation & implicit directory injection
+	places := make(map[string]place)
+
+	// The idea behind this machinery is that we try to place all files and
+	// directories into a map while creating the required parent directories
+	// on-the-fly as implicit directories. Overriding an implicit directory
+	// with an explicit one is allowed thus the actual order in which this
+	// structure is created does not matter. All non-directories cannot be
+	// overridden anyways so their insertion order does not matter.
+	// This also has the job of validating the FSSpec structure, ensuring that
+	// there are no duplicate paths and that there is nothing placed below a
+	// non-directory.
+	var placeInode func(p string, isDir bool, inode interface{})
+	placeInode = func(p string, isDir bool, inode interface{}) {
+		cleanPath := path.Clean(p)
+		if !isDir {
+			if places[cleanPath].State != placeNone {
+				log.Fatalf("Invalid FSSpec: Duplicate Inode at %q", cleanPath)
+			}
+			places[cleanPath] = place{
+				State: placeNonDir,
+				Inode: inode,
+			}
+		} else {
+			switch places[cleanPath].State {
+			case placeNone:
+				if inode != nil {
+					places[cleanPath] = place{
+						State: placeDirExplicit,
+						Inode: inode,
+					}
+				} else {
+					places[cleanPath] = place{
+						State: placeDirImplicit,
+						Inode: &fsspec.Directory{Path: cleanPath, Mode: 0555},
+					}
+				}
+			case placeDirImplicit:
+				if inode != nil {
+					places[cleanPath] = place{
+						State: placeDirExplicit,
+						Inode: inode,
+					}
+				}
+			case placeDirExplicit:
+				if inode != nil {
+					log.Fatalf("Invalid FSSpec: Conflicting explicit directories at %v", cleanPath)
+				}
+			case placeNonDir:
+				log.Fatalf("Invalid FSSpec: Trying to place inode below non-directory at #{cleanPath}")
+			default:
+				panic("unhandled placeEnum value")
+			}
+		}
+		parentPath, _ := path.Split(p)
+		parentPath = path.Clean(parentPath)
+		if parentPath == "/" || parentPath == p {
+			return
+		}
+		placeInode(parentPath, true, nil)
+	}
+	for _, d := range spec.Directory {
+		placeInode(d.Path, true, d)
+	}
+	for _, f := range spec.File {
+		placeInode(f.Path, false, f)
+	}
+	for _, s := range spec.SymbolicLink {
+		placeInode(s.Path, false, s)
+	}
+	for _, s := range spec.SpecialFile {
+		placeInode(s.Path, false, s)
+	}
+
+	var writeOrder []string
+	for path := range places {
+		writeOrder = append(writeOrder, path)
+	}
+	// Sorting a list of normalized paths representing a tree gives us Depth-
+	// first search (DFS) order which is the correct order for writing archives.
+	// This also makes the output reproducible.
+	sort.Strings(writeOrder)
+
+	for _, path := range writeOrder {
+		place := places[path]
+		switch i := place.Inode.(type) {
+		case *fsspec.File:
+			inF, err := os.Open(i.SourcePath)
+			if err != nil {
+				log.Fatalf("Failed to open source path for file %q: %v", i.Path, err)
+			}
+			inFStat, err := inF.Stat()
+			if err != nil {
+				log.Fatalf("Failed to stat source path for file %q: %v", i.Path, err)
+			}
+			if err := cpioWriter.WriteHeader(&cpio.Header{
+				Mode: cpio.FileMode(i.Mode),
+				Name: strings.TrimPrefix(i.Path, "/"),
+				Size: inFStat.Size(),
+			}); err != nil {
+				log.Fatalf("Failed to write cpio header for file %q: %v", i.Path, err)
+			}
+			if _, err := io.Copy(cpioWriter, inF); err != nil {
+				log.Fatalf("Failed to copy file %q into cpio: %v", i.SourcePath, err)
+			}
+			inF.Close()
+		case *fsspec.Directory:
+			if err := cpioWriter.WriteHeader(&cpio.Header{
+				Mode: cpio.FileMode(i.Mode) | cpio.ModeDir,
+				Name: strings.TrimPrefix(i.Path, "/"),
+			}); err != nil {
+				log.Fatalf("Failed to write cpio header for directory %q: %v", i.Path, err)
+			}
+		case *fsspec.SymbolicLink:
+			if err := cpioWriter.WriteHeader(&cpio.Header{
+				// Symlinks are 0777 by definition (from man 7 symlink on Linux)
+				Mode:     0777 | cpio.ModeSymlink,
+				Name:     strings.TrimPrefix(i.Path, "/"),
+				Linkname: i.TargetPath,
+			}); err != nil {
+				log.Fatalf("Failed to write cpio header for symlink %q: %v", i.Path, err)
+			}
+		case *fsspec.SpecialFile:
+			mode := cpio.FileMode(i.Mode)
+			switch i.Type {
+			case fsspec.SpecialFile_CHARACTER_DEV:
+				mode |= cpio.ModeCharDevice
+			case fsspec.SpecialFile_BLOCK_DEV:
+				mode |= cpio.ModeDevice
+			case fsspec.SpecialFile_FIFO:
+				mode |= cpio.ModeNamedPipe
+			}
+
+			if err := cpioWriter.WriteHeader(&cpio.Header{
+				Mode:     mode,
+				Name:     strings.TrimPrefix(i.Path, "/"),
+				DeviceID: int(unix.Mkdev(i.Major, i.Minor)),
+			}); err != nil {
+				log.Fatalf("Failed to write CPIO header for special file %q: %v", i.Path, err)
+			}
+		default:
+			panic("inode type not handled")
+		}
+	}
+}
diff --git a/metropolis/node/build/mkerofs/BUILD.bazel b/metropolis/node/build/mkerofs/BUILD.bazel
index 3cbcbde..43e2f5c 100644
--- a/metropolis/node/build/mkerofs/BUILD.bazel
+++ b/metropolis/node/build/mkerofs/BUILD.bazel
@@ -8,7 +8,6 @@
     deps = [
         "//metropolis/node/build/fsspec:go_default_library",
         "//metropolis/pkg/erofs:go_default_library",
-        "@com_github_golang_protobuf//proto:go_default_library",
     ],
 )
 
diff --git a/metropolis/node/build/mkerofs/main.go b/metropolis/node/build/mkerofs/main.go
index 651096b..0d35eff 100644
--- a/metropolis/node/build/mkerofs/main.go
+++ b/metropolis/node/build/mkerofs/main.go
@@ -28,8 +28,6 @@
 	"sort"
 	"strings"
 
-	"github.com/golang/protobuf/proto"
-
 	"source.monogon.dev/metropolis/node/build/fsspec"
 	"source.monogon.dev/metropolis/pkg/erofs"
 )
@@ -125,20 +123,15 @@
 }
 
 var (
-	specPath = flag.String("spec", "", "Path to the filesystem specification (spec.FSSpec)")
-	outPath  = flag.String("out", "", "Output file path")
+	outPath = flag.String("out", "", "Output file path")
 )
 
 func main() {
 	flag.Parse()
-	specRaw, err := os.ReadFile(*specPath)
-	if err != nil {
-		log.Fatalf("failed to open spec: %v", err)
-	}
 
-	var spec fsspec.FSSpec
-	if err := proto.UnmarshalText(string(specRaw), &spec); err != nil {
-		log.Fatalf("failed to parse spec: %v", err)
+	spec, err := fsspec.ReadMergeSpecs(flag.Args())
+	if err != nil {
+		log.Fatalf("failed to load specs: %v", err)
 	}
 
 	var fsRoot = &entrySpec{