m/test/launch: build image at runtime

Test launch now builds the node disk image from the OCI image, instead
of creating a qcow2 snapshot of the pre-built disk image. This speeds up
tests and test cluster launch.

The OCI image is uncompressed and payloads are not verified, which
enables the previously implemented copy_file_range optimization. If the
host file system supports reflinks, this has a similar effect as the
qcow2 snapshot had previously: Building the image is very fast as the
rootfs data is not copied on disk. On my machine, it takes 30 ms.

The build before launching a cluster is now faster: The MkImage step
taking 6 s is replaced by MkOCI taking 1 s. The majority of this time is
spent by Bazel computing hashes of files. For MkImage, the generated
file was a 5 GB disk image consisting mostly of zeroes, which took a
long time to hash.

Additionally, the qcow2 layer added some overhead, which is now gone.
The HA e2e test previously took 103 s on my machine, now it takes 80 s.

Change-Id: I0ce5059626cc682061c26ac3c8d11b752e641c60
Reviewed-on: https://review.monogon.dev/c/monogon/+/4294
Tested-by: Jenkins CI
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
diff --git a/metropolis/node/BUILD.bazel b/metropolis/node/BUILD.bazel
index 60368ff..ff24cca 100644
--- a/metropolis/node/BUILD.bazel
+++ b/metropolis/node/BUILD.bazel
@@ -140,6 +140,17 @@
     visibility = ["//visibility:public"],
 )
 
+oci_os_image(
+    name = "oci_image_uncompressed",
+    srcs = {
+        "system": ":verity_rootfs",
+        "kernel.efi": ":kernel_efi",
+    },
+    compression_level = 0,
+    product_info = ":product_info",
+    visibility = ["//metropolis/test/launch:__pkg__"],
+)
+
 # An intermediary "bundle" format until we finalize the actual bundle format. This is NOT stable until migrated
 # to the actual bundle format.
 # TODO(lorenz): Replace this
diff --git a/metropolis/test/launch/BUILD.bazel b/metropolis/test/launch/BUILD.bazel
index 492fdd4..bbe3eb9 100644
--- a/metropolis/test/launch/BUILD.bazel
+++ b/metropolis/test/launch/BUILD.bazel
@@ -13,7 +13,8 @@
     ],
     data = [
         "//metropolis/cli/metroctl:metroctl_lite",
-        "//metropolis/node:image",
+        "//metropolis/node:oci_image_uncompressed",
+        "//metropolis/node/abloader",
         "//metropolis/test/nanoswitch:initramfs",
         "//metropolis/test/swtpm/certtool",
         "//metropolis/test/swtpm/swtpm_cert",
@@ -37,11 +38,11 @@
         "xOvmfCodePath": "$(rlocationpath //third_party/edk2:OVMF_CODE.fd )",
         "xKernelPath": "$(rlocationpath //osbase/test/ktest:linux-testing )",
         "xInitramfsPath": "$(rlocationpath //metropolis/test/nanoswitch:initramfs )",
-        "xNodeImagePath": "$(rlocationpath //metropolis/node:image )",
+        "xNodeImagePath": "$(rlocationpath //metropolis/node:oci_image_uncompressed )",
+        "xAbloaderPath": "$(rlocationpath //metropolis/node/abloader )",
     },
     deps = [
         "//go/logging",
-        "//go/qcow2",
         "//metropolis/cli/metroctl/core",
         "//metropolis/node",
         "//metropolis/node/core/curator/proto/api",
@@ -49,8 +50,13 @@
         "//metropolis/node/core/rpc/resolver",
         "//metropolis/proto/api",
         "//metropolis/proto/common",
+        "//osbase/blockdev",
+        "//osbase/build/mkimage/osimage",
         "//osbase/logbuffer",
+        "//osbase/oci",
+        "//osbase/oci/osimage",
         "//osbase/oci/registry",
+        "//osbase/structfs",
         "//osbase/test/qemu",
         "@com_github_cenkalti_backoff_v4//:backoff",
         "@com_github_kballard_go_shellquote//:go-shellquote",
diff --git a/metropolis/test/launch/cluster.go b/metropolis/test/launch/cluster.go
index 5ad4121..1e3f42b 100644
--- a/metropolis/test/launch/cluster.go
+++ b/metropolis/test/launch/cluster.go
@@ -46,12 +46,16 @@
 	cpb "source.monogon.dev/metropolis/proto/common"
 
 	"source.monogon.dev/go/logging"
-	"source.monogon.dev/go/qcow2"
 	metroctl "source.monogon.dev/metropolis/cli/metroctl/core"
 	"source.monogon.dev/metropolis/node"
 	"source.monogon.dev/metropolis/node/core/rpc"
 	"source.monogon.dev/metropolis/node/core/rpc/resolver"
+	"source.monogon.dev/osbase/blockdev"
+	"source.monogon.dev/osbase/build/mkimage/osimage"
+	"source.monogon.dev/osbase/oci"
+	ociosimage "source.monogon.dev/osbase/oci/osimage"
 	"source.monogon.dev/osbase/oci/registry"
+	"source.monogon.dev/osbase/structfs"
 	"source.monogon.dev/osbase/test/qemu"
 )
 
@@ -161,23 +165,53 @@
 		return nil, fmt.Errorf("failed to create the state directory: %w", err)
 	}
 
-	// Initialize the node's storage with a prebuilt image.
-	st, err := os.Stat(xNodeImagePath)
+	// Initialize the node's storage.
+	ociImage, err := oci.ReadLayout(xNodeImagePath)
 	if err != nil {
-		return nil, fmt.Errorf("cannot read image file: %w", err)
+		return nil, fmt.Errorf("failed to read OS image: %w", err)
 	}
-	diskBytes = max(diskBytes, uint64(st.Size()))
+	osImage, err := ociosimage.Read(ociImage)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read OS image: %w", err)
+	}
 
-	di := filepath.Join(stdp, "image.qcow2")
-	logf("Cluster: generating node QCOW2 snapshot image: %s -> %s", xNodeImagePath, di)
+	efiPayload, err := osImage.PayloadUnverified("kernel.efi")
+	if err != nil {
+		return nil, fmt.Errorf("cannot open EFI payload in OS image: %w", err)
+	}
+	systemImage, err := osImage.PayloadUnverified("system")
+	if err != nil {
+		return nil, fmt.Errorf("cannot open system image in OS image: %w", err)
+	}
 
-	df, err := os.Create(di)
+	abloader, err := structfs.OSPathBlob(xAbloaderPath)
+	if err != nil {
+		return nil, fmt.Errorf("cannot open abloader: %w", err)
+	}
+
+	di := filepath.Join(stdp, "image.img")
+	logf("Cluster: generating node image: %s -> %s", xNodeImagePath, di)
+
+	df, err := blockdev.CreateFile(di, 512, int64(diskBytes/512))
 	if err != nil {
 		return nil, fmt.Errorf("while opening image for writing: %w", err)
 	}
 	defer df.Close()
-	if err := qcow2.Generate(df, qcow2.GenerateWithBackingFile(xNodeImagePath), qcow2.GenerateWithFileSize(diskBytes)); err != nil {
-		return nil, fmt.Errorf("while creating copy-on-write node image: %w", err)
+
+	installParams := &osimage.Params{
+		PartitionSize: osimage.PartitionSizeInfo{
+			ESP:    128,
+			System: 1024,
+			Data:   128,
+		},
+		Architecture: osImage.Config.ProductInfo.Architecture(),
+		SystemImage:  systemImage,
+		EFIPayload:   efiPayload,
+		ABLoader:     abloader,
+		Output:       df,
+	}
+	if _, err := osimage.Write(installParams); err != nil {
+		return nil, fmt.Errorf("while creating node image: %w", err)
 	}
 
 	// Initialize the OVMF firmware variables file.
@@ -249,6 +283,9 @@
 	if options.MemoryMiB == 0 {
 		options.MemoryMiB = 2048
 	}
+	if options.DiskBytes == 0 {
+		options.DiskBytes = 5 * 1024 * 1024 * 1024
+	}
 
 	// If it's the node's first start, set up its runtime directories.
 	if options.Runtime == nil {
@@ -295,7 +332,7 @@
 
 	tpmSocketPath := filepath.Join(r.sd, "tpm-socket")
 	fwVarPath := filepath.Join(r.ld, "OVMF_VARS.fd")
-	storagePath := filepath.Join(r.ld, "image.qcow2")
+	storagePath := filepath.Join(r.ld, "image.img")
 	qemuArgs := []string{
 		"-machine", "q35",
 		"-accel", "kvm",
@@ -306,7 +343,7 @@
 		"-smp", fmt.Sprintf("cores=%d,threads=%d", options.CPUs, options.ThreadsPerCPU),
 		"-drive", "if=pflash,format=raw,readonly=on,file=" + xOvmfCodePath,
 		"-drive", "if=pflash,format=raw,file=" + fwVarPath,
-		"-drive", "if=virtio,format=qcow2,cache=unsafe,file=" + storagePath,
+		"-drive", "if=virtio,format=raw,cache=unsafe,file=" + storagePath,
 		"-netdev", qemuNetConfig.ToOption(qemuNetType),
 		"-device", "virtio-net-pci,netdev=net0,mac=" + options.Mac.String(),
 		"-chardev", "socket,id=chrtpm,path=" + tpmSocketPath,
diff --git a/metropolis/test/launch/launch.go b/metropolis/test/launch/launch.go
index 759bf28..3864eb2 100644
--- a/metropolis/test/launch/launch.go
+++ b/metropolis/test/launch/launch.go
@@ -22,6 +22,7 @@
 	xKernelPath       string
 	xInitramfsPath    string
 	xNodeImagePath    string
+	xAbloaderPath     string
 )
 
 func init() {
@@ -30,7 +31,7 @@
 		&xSwtpmPath, &xSwtpmSetupPath, &xSwtpmLocalCAPath,
 		&xSwtpmCertPath, &xCerttoolPath, &xMetroctlPath,
 		&xOvmfCodePath, &xOvmfVarsPath, &xKernelPath,
-		&xInitramfsPath, &xNodeImagePath,
+		&xInitramfsPath, &xNodeImagePath, &xAbloaderPath,
 	} {
 		*path, err = runfiles.Rlocation(*path)
 		if err != nil {
diff --git a/osbase/oci/osimage/osimage.go b/osbase/oci/osimage/osimage.go
index 30a88ae..ac9721a 100644
--- a/osbase/oci/osimage/osimage.go
+++ b/osbase/oci/osimage/osimage.go
@@ -100,6 +100,21 @@
 	return nil, fmt.Errorf("payload %q not found", name)
 }
 
+// PayloadUnverified returns the contents of the payload of the given name.
+// Data is not verified against hashes. This only works for uncompressed images.
+func (i *Image) PayloadUnverified(name string) (structfs.Blob, error) {
+	for pi, info := range i.Config.Payloads {
+		if info.Name == name {
+			layer := &i.image.Manifest.Layers[pi]
+			if layer.MediaType != MediaTypePayloadUncompressed {
+				return nil, fmt.Errorf("unsupported media type %q for unverified payload", layer.MediaType)
+			}
+			return i.image.StructfsBlob(layer), nil
+		}
+	}
+	return nil, fmt.Errorf("payload %q not found", name)
+}
+
 type payloadBlob struct {
 	raw       structfs.Blob
 	mediaType string