Add EROFS creation utility and Bazel rule

This adds a binary which can create EROFS filesystems from a spec and a Bazel rule similar to
node_initramfs which creates EROFS filesystems.

Test Plan: Tested in subsequent revision

X-Origin-Diff: phab/D695
GitOrigin-RevId: 4e8f970938c93606da46f475387de1e013b1c35c
diff --git a/metropolis/node/build/def.bzl b/metropolis/node/build/def.bzl
index 19074c2..39af192 100644
--- a/metropolis/node/build/def.bzl
+++ b/metropolis/node/build/def.bzl
@@ -255,3 +255,124 @@
         ),
     },
 )
+
+def _erofs_image_impl(ctx):
+    """
+    Generate an EROFS filesystem based on a label/file list.
+    """
+
+    # Generate config file for gen_init_cpio that describes the initramfs to build.
+    fs_spec_name = ctx.label.name + ".prototxt"
+    fs_spec = ctx.actions.declare_file(fs_spec_name)
+
+    fs_files = []
+    inputs = []
+    for label, p in ctx.attr.files.items() + ctx.attr.files_cc.items():
+        if not p.startswith("/"):
+            fail("file {} invalid: must begin with /".format(p))
+
+        # Figure out if this is an executable.
+        is_executable = True
+
+        di = label[DefaultInfo]
+        if di.files_to_run.executable == None:
+            # Generated non-executable files will have DefaultInfo.files_to_run.executable == None
+            is_executable = False
+        elif di.files_to_run.executable.is_source:
+            # Source files will have executable.is_source == True
+            is_executable = False
+
+        # Ensure only single output is declared.
+        # If you hit this error, figure out a better logic to find what file you need, maybe looking at providers other
+        # than DefaultInfo.
+        files = di.files.to_list()
+        if len(files) > 1:
+            fail("file {} has more than one output: {}", p, files)
+        src = files[0]
+        inputs.append(src)
+
+        mode = 0o555 if is_executable else 0o444
+        fs_files.append(struct(path = p, source_path = src.path, mode = mode, uid = 0, gid = 0))
+
+    fs_dirs = []
+    for p in ctx.attr.extra_dirs:
+        if not p.startswith("/"):
+            fail("directory {} invalid: must begin with /".format(p))
+
+        fs_dirs.append(struct(path = p, mode = 0o555, uid = 0, gid = 0))
+
+    fs_symlinks = []
+    for target, p in ctx.attr.symlinks.items():
+        fs_symlinks.append(struct(path = p, target_path = target))
+
+    fs_spec_content = struct(file = fs_files, directory = fs_dirs, symbolic_link = fs_symlinks)
+    ctx.actions.write(fs_spec, fs_spec_content.to_proto())
+
+    fs_name = ctx.label.name + ".img"
+    fs_out = ctx.actions.declare_file(fs_name)
+    ctx.actions.run(
+        outputs = [fs_out],
+        inputs = [fs_spec] + inputs,
+        tools = [ctx.executable._mkerofs],
+        executable = ctx.executable._mkerofs,
+        arguments = ["-out", fs_out.path, "-spec", fs_spec.path],
+    )
+
+    return [DefaultInfo(files = depset([fs_out]))]
+
+erofs_image = rule(
+    implementation = _erofs_image_impl,
+    doc = """
+        Build an EROFS. All files specified in files, files_cc and all specified symlinks will be contained.
+        Executable files will have their permissions set to 0555, non-executable files will have
+        their permissions set to 0444. All parent directories will be created with 0555 permissions.
+    """,
+    attrs = {
+        "files": attr.label_keyed_string_dict(
+            mandatory = True,
+            allow_files = True,
+            doc = """
+                Dictionary of Labels to String, placing a given Label's output file in the EROFS at the location
+                specified by the String value. The specified labels must only have a single output.
+            """,
+            # Attach pure transition to ensure all binaries added to the initramfs are pure/static binaries.
+            cfg = build_pure_transition,
+        ),
+        "files_cc": attr.label_keyed_string_dict(
+            allow_files = True,
+            doc = """
+                 Special case of 'files' for compilation targets that need to be built with the musl toolchain like
+                 go_binary targets which need cgo or cc_binary targets.
+            """,
+            # Attach static transition to all files_cc inputs to ensure they are built with musl and static.
+            cfg = build_static_transition,
+        ),
+        "extra_dirs": attr.string_list(
+            default = [],
+            doc = """
+                Extra directories to create. These will be created in addition to all the directories required to
+                contain the files specified in the `files` attribute.
+            """,
+        ),
+        "symlinks": attr.string_dict(
+            default = {},
+            doc = """
+                Symbolic links to create. Similar format as in files and files_cc, so the target of the symlink is the
+                key and the value of it is the location of the symlink itself. Only raw strings are allowed as targets,
+                labels are not permitted. Include the file using files or files_cc, then symlink to its location.
+          """,
+        ),
+
+        # Tools, implicit dependencies.
+        "_mkerofs": attr.label(
+            default = Label("//metropolis/node/build/mkerofs"),
+            executable = True,
+            cfg = "host",
+        ),
+
+        # Allow for transitions to be attached to this rule.
+        "_whitelist_function_transition": attr.label(
+            default = "@bazel_tools//tools/whitelists/function_transition_whitelist",
+        ),
+    },
+)
diff --git a/metropolis/node/build/mkerofs/BUILD.bazel b/metropolis/node/build/mkerofs/BUILD.bazel
new file mode 100644
index 0000000..6de1c73
--- /dev/null
+++ b/metropolis/node/build/mkerofs/BUILD.bazel
@@ -0,0 +1,19 @@
+load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
+
+go_library(
+    name = "go_default_library",
+    srcs = ["main.go"],
+    importpath = "source.monogon.dev/metropolis/node/build/mkerofs",
+    visibility = ["//visibility:public"],
+    deps = [
+        "//metropolis/node/build/mkerofs/fsspec:go_default_library",
+        "//metropolis/pkg/erofs:go_default_library",
+        "@com_github_golang_protobuf//proto:go_default_library",
+    ],
+)
+
+go_binary(
+    name = "mkerofs",
+    embed = [":go_default_library"],
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/node/build/mkerofs/fsspec/BUILD.bazel b/metropolis/node/build/mkerofs/fsspec/BUILD.bazel
new file mode 100644
index 0000000..bd0f036
--- /dev/null
+++ b/metropolis/node/build/mkerofs/fsspec/BUILD.bazel
@@ -0,0 +1,30 @@
+load("@rules_proto//proto:defs.bzl", "proto_library")
+load("@io_bazel_rules_go//go:def.bzl", "go_library")
+load("@io_bazel_rules_go//proto:def.bzl", "go_proto_library")
+
+proto_library(
+    name = "spec_proto",
+    srcs = ["spec.proto"],
+    visibility = ["//visibility:public"],
+)
+
+go_proto_library(
+    name = "spec_go_proto",
+    importpath = "source.monogon.dev/metropolis/node/build/mkerofs/spec",
+    proto = ":spec_proto",
+    visibility = ["//visibility:public"],
+)
+
+go_library(
+    name = "go_default_library",
+    embed = [":fsspec_go_proto"],
+    importpath = "source.monogon.dev/metropolis/node/build/mkerofs/fsspec",
+    visibility = ["//visibility:public"],
+)
+
+go_proto_library(
+    name = "fsspec_go_proto",
+    importpath = "source.monogon.dev/metropolis/node/build/mkerofs/fsspec",
+    proto = ":spec_proto",
+    visibility = ["//visibility:public"],
+)
diff --git a/metropolis/node/build/mkerofs/fsspec/spec.proto b/metropolis/node/build/mkerofs/fsspec/spec.proto
new file mode 100644
index 0000000..3d6e8dc
--- /dev/null
+++ b/metropolis/node/build/mkerofs/fsspec/spec.proto
@@ -0,0 +1,70 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package metropolis.node.build.mkerofs.fsspec;
+option go_package = "source.monogon.dev/metropolis/node/build/mkerofs/fsspec";
+
+// FSSpec is the spec from which a filesystem is generated. It consists of files, directories and symbolic
+// links. Directories are also automatically inferred when required for the placement of files or symbolic
+// links. Inferred directories always have uid 0, gid 0 and permissions 0555. This can be overridden by
+// explicitly specifying a directory at a given path.
+message FSSpec {
+  repeated File file = 1;
+  repeated Directory directory = 2;
+  repeated SymbolicLink symbolic_link = 3;
+}
+
+// For internal use only. Represents all supported inodes in a oneof.
+message Inode {
+  oneof type {
+    File file = 1;
+    Directory directory = 2;
+    SymbolicLink symbolic_link = 3;
+  }
+}
+
+message File {
+  // The path where the file ends up in the filesystem.
+  string path = 1;
+  // The path on the host filesystem where the file contents should be taken from.
+  string source_path = 2;
+  // Unix permission bits
+  uint32 mode = 3;
+  // Owner uid
+  uint32 uid = 4;
+  // Owner gid
+  uint32 gid = 5;
+}
+
+message Directory {
+  // The path where the directory ends up in the filesystem.
+  string path = 1;
+  // Unix permission bits
+  uint32 mode = 2;
+  // Owner uid
+  uint32 uid = 3;
+  // Owner gid
+  uint32 gid = 4;
+}
+
+message SymbolicLink {
+  // The path where the symbolic link ends up in the filesystem.
+  string path = 1;
+  // The path to which the symbolic link resolves to.
+  string target_path = 2;
+}
\ No newline at end of file
diff --git a/metropolis/node/build/mkerofs/main.go b/metropolis/node/build/mkerofs/main.go
new file mode 100644
index 0000000..a05e440
--- /dev/null
+++ b/metropolis/node/build/mkerofs/main.go
@@ -0,0 +1,179 @@
+// Copyright 2020 The Monogon Project Authors.
+//
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// mkerofs takes a specification in the form of a prototext file (see fsspec next to this) and assembles an
+// EROFS filesystem according to it. The output is fully reproducible.
+package main
+
+import (
+	"flag"
+	"io"
+	"io/ioutil"
+	"log"
+	"os"
+	"path"
+	"sort"
+	"strings"
+
+	"github.com/golang/protobuf/proto"
+
+	"source.monogon.dev/metropolis/node/build/mkerofs/fsspec"
+	"source.monogon.dev/metropolis/pkg/erofs"
+)
+
+func (spec *entrySpec) writeRecursive(w *erofs.Writer, pathname string) {
+	switch inode := spec.data.Type.(type) {
+	case *fsspec.Inode_Directory:
+		// Sort children for reproducibility
+		var sortedChildren []string
+		for name := range spec.children {
+			sortedChildren = append(sortedChildren, name)
+		}
+		sort.Strings(sortedChildren)
+
+		err := w.Create(pathname, &erofs.Directory{
+			Base: erofs.Base{
+				Permissions: uint16(inode.Directory.Mode),
+				UID:         uint16(inode.Directory.Uid),
+				GID:         uint16(inode.Directory.Gid),
+			},
+			Children: sortedChildren,
+		})
+		if err != nil {
+			log.Fatalf("failed to write directory: %s", err)
+		}
+		for _, name := range sortedChildren {
+			spec.children[name].writeRecursive(w, path.Join(pathname, name))
+		}
+	case *fsspec.Inode_File:
+		iw := w.CreateFile(pathname, &erofs.FileMeta{
+			Base: erofs.Base{
+				Permissions: uint16(inode.File.Mode),
+				UID:         uint16(inode.File.Uid),
+				GID:         uint16(inode.File.Gid),
+			},
+		})
+
+		sourceFile, err := os.Open(inode.File.SourcePath)
+		if err != nil {
+			log.Fatalf("failed to open source file %s: %s", inode.File.SourcePath, err)
+		}
+
+		_, err = io.Copy(iw, sourceFile)
+		if err != nil {
+			log.Fatalf("failed to copy file into filesystem: %s", err)
+		}
+		sourceFile.Close()
+		if err := iw.Close(); err != nil {
+			log.Fatalf("failed to close target file: %s", err)
+		}
+	case *fsspec.Inode_SymbolicLink:
+		err := w.Create(pathname, &erofs.SymbolicLink{
+			Base: erofs.Base{
+				Permissions: 0777, // Nominal, Linux forces that mode anyways, see symlink(7)
+			},
+			Target: inode.SymbolicLink.TargetPath,
+		})
+		if err != nil {
+			log.Fatalf("failed to create symbolic link: %s", err)
+		}
+	}
+}
+
+// entrySpec is a recursive structure representing the filesystem tree
+type entrySpec struct {
+	data     fsspec.Inode
+	children map[string]*entrySpec
+}
+
+// pathRef gets the entrySpec at the leaf of the given path, inferring directories if necessary
+func (s *entrySpec) pathRef(p string) *entrySpec {
+	// This block gets a path array starting at the root of the filesystem. The root folder is the zero-length array.
+	pathParts := strings.Split(path.Clean("./"+p), "/")
+	if pathParts[0] == "." {
+		pathParts = pathParts[1:]
+	}
+
+	entryRef := s
+	for _, part := range pathParts {
+		childRef, ok := entryRef.children[part]
+		if !ok {
+			childRef = &entrySpec{
+				data:     fsspec.Inode{Type: &fsspec.Inode_Directory{Directory: &fsspec.Directory{Mode: 0555}}},
+				children: make(map[string]*entrySpec),
+			}
+			entryRef.children[part] = childRef
+		}
+		entryRef = childRef
+	}
+	return entryRef
+}
+
+var (
+	specPath = flag.String("spec", "", "Path to the filesystem specification (spec.FSSpec)")
+	outPath  = flag.String("out", "", "Output file path")
+)
+
+func main() {
+	flag.Parse()
+	specRaw, err := ioutil.ReadFile(*specPath)
+	if err != nil {
+		log.Fatalf("failed to open spec: %v", err)
+	}
+
+	var spec fsspec.FSSpec
+	if err := proto.UnmarshalText(string(specRaw), &spec); err != nil {
+		log.Fatalf("failed to parse spec: %v", err)
+	}
+
+	var fsRoot = &entrySpec{
+		data:     fsspec.Inode{Type: &fsspec.Inode_Directory{Directory: &fsspec.Directory{Mode: 0555}}},
+		children: make(map[string]*entrySpec),
+	}
+
+	for _, dir := range spec.Directory {
+		entryRef := fsRoot.pathRef(dir.Path)
+		entryRef.data.Type = &fsspec.Inode_Directory{Directory: dir}
+	}
+
+	for _, file := range spec.File {
+		entryRef := fsRoot.pathRef(file.Path)
+		entryRef.data.Type = &fsspec.Inode_File{File: file}
+	}
+
+	for _, symlink := range spec.SymbolicLink {
+		entryRef := fsRoot.pathRef(symlink.Path)
+		entryRef.data.Type = &fsspec.Inode_SymbolicLink{SymbolicLink: symlink}
+	}
+
+	fs, err := os.Create(*outPath)
+	if err != nil {
+		log.Fatalf("failed to open output file: %v", err)
+	}
+	writer, err := erofs.NewWriter(fs)
+	if err != nil {
+		log.Fatalf("failed to initialize EROFS writer: %v", err)
+	}
+
+	fsRoot.writeRecursive(writer, ".")
+
+	if err := writer.Close(); err != nil {
+		panic(err)
+	}
+	if err := fs.Close(); err != nil {
+		panic(err)
+	}
+}