treewide: stop using LZ4 for initrd compression
There are two issues at play here: One is a bug in pierrec/lz4 when
using the legacy framing format [1]. This bit us when we hit a broken
size region with CL:2130, taking hours to debug.
The other is the fact that the Linux LZ4 frame format has significant
design issues [2], especially with concatenanted initrds.
The first issue could be fixed by switching to a different LZ4
implementation (we do even have the reference impl in the monorepo) but
there is no API to generate the legacy frame format and things like [3],
a patch carried by Ubuntu to fix more edge cases just do not inspire
confidence in such a solution.
Thus, this CL switches over to using zstd for compressing initrds.
Zstd is slower than LZ4 for decompressing, but it still decompresses at
multiple GB/s per core while having a much better compression ratio.
It also doesn't have any Linux-specific bits and Linux uses the
reference implementation for decoding, which should make it much more
robust. So overall I think this is a good tradeoff.
[1] https://github.com/pierrec/lz4/issues/156
[2] https://github.com/lz4/lz4/issues/956#issuecomment-736705712
[3] https://launchpadlibrarian.net/507407918/0001-unlz4-Handle-0-size-chunks-discard-trailing-padding-.patch
Change-Id: I69cf69f2f361de325f4b39f2d3644ee729643716
Reviewed-on: https://review.monogon.dev/c/monogon/+/2313
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/cloud/agent/e2e/BUILD.bazel b/cloud/agent/e2e/BUILD.bazel
index 2ed2ad5..fa03d66 100644
--- a/cloud/agent/e2e/BUILD.bazel
+++ b/cloud/agent/e2e/BUILD.bazel
@@ -16,7 +16,7 @@
         "//metropolis/pkg/pki",
         "//metropolis/proto/api",
         "@com_github_cavaliergopher_cpio//:cpio",
-        "@com_github_pierrec_lz4_v4//:lz4",
+        "@com_github_klauspost_compress//zstd",
         "@org_golang_google_grpc//:go_default_library",
         "@org_golang_google_grpc//credentials",
         "@org_golang_google_protobuf//proto",
diff --git a/cloud/agent/e2e/main_test.go b/cloud/agent/e2e/main_test.go
index 27bcd03..277cc55 100644
--- a/cloud/agent/e2e/main_test.go
+++ b/cloud/agent/e2e/main_test.go
@@ -21,7 +21,7 @@
 	"time"
 
 	"github.com/cavaliergopher/cpio"
-	"github.com/pierrec/lz4/v4"
+	"github.com/klauspost/compress/zstd"
 	"golang.org/x/sys/unix"
 	"google.golang.org/grpc"
 	"google.golang.org/grpc/credentials"
@@ -183,7 +183,7 @@
 	if err != nil {
 		t.Fatal(err)
 	}
-	initramfsOrigPath, err := datafile.ResolveRunfile("cloud/agent/initramfs.cpio.lz4")
+	initramfsOrigPath, err := datafile.ResolveRunfile("cloud/agent/initramfs.cpio.zst")
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -207,9 +207,11 @@
 	if err != nil {
 		t.Fatal(err)
 	}
-	compressedOut := lz4.NewWriter(initramfsFile)
-	compressedOut.Apply(lz4.LegacyOption(true))
-	cpioW := cpio.NewWriter(compressedOut)
+	compressedW, err := zstd.NewWriter(initramfsFile, zstd.WithEncoderLevel(1))
+	if err != nil {
+		t.Fatal(err)
+	}
+	cpioW := cpio.NewWriter(compressedW)
 	cpioW.WriteHeader(&cpio.Header{
 		Name: "/init.pb",
 		Size: int64(len(agentInitRaw)),
@@ -217,7 +219,7 @@
 	})
 	cpioW.Write(agentInitRaw)
 	cpioW.Close()
-	compressedOut.Close()
+	compressedW.Close()
 
 	grpcGuestFwd := fmt.Sprintf("guestfwd=tcp:%s-tcp:127.0.0.1:%d", grpcAddr.String(), grpcListenAddr.Port)
 	blobGuestFwd := fmt.Sprintf("guestfwd=tcp:%s-tcp:127.0.0.1:%d", blobAddr.String(), blobListenAddr.Port)
diff --git a/cloud/takeover/BUILD.bazel b/cloud/takeover/BUILD.bazel
index 21b891a..103ea2d 100644
--- a/cloud/takeover/BUILD.bazel
+++ b/cloud/takeover/BUILD.bazel
@@ -19,7 +19,7 @@
         "//net/dump",
         "//net/proto",
         "@com_github_cavaliergopher_cpio//:cpio",
-        "@com_github_pierrec_lz4_v4//:lz4",
+        "@com_github_klauspost_compress//zstd",
         "@org_golang_google_protobuf//proto",
         "@org_golang_x_sys//unix",
     ],
diff --git a/cloud/takeover/takeover.go b/cloud/takeover/takeover.go
index 9de39f5..e265b30 100644
--- a/cloud/takeover/takeover.go
+++ b/cloud/takeover/takeover.go
@@ -27,7 +27,7 @@
 	"time"
 
 	"github.com/cavaliergopher/cpio"
-	"github.com/pierrec/lz4/v4"
+	"github.com/klauspost/compress/zstd"
 	"golang.org/x/sys/unix"
 	"google.golang.org/protobuf/proto"
 
@@ -44,7 +44,7 @@
 //go:embed ucode.cpio
 var ucode []byte
 
-//go:embed cloud/agent/initramfs.cpio.lz4
+//go:embed cloud/agent/initramfs.cpio.zst
 var initramfs []byte
 
 // newMemfile creates a new file which is not located on a specific filesystem,
@@ -124,9 +124,11 @@
 	}
 
 	// Append AgentInit spec to initramfs
-	compressedOut := lz4.NewWriter(initramfsFile)
-	compressedOut.Apply(lz4.LegacyOption(true))
-	cpioW := cpio.NewWriter(compressedOut)
+	compressedW, err := zstd.NewWriter(initramfsFile, zstd.WithEncoderLevel(1))
+	if err != nil {
+		return nil, fmt.Errorf("while creating zstd writer: %w", err)
+	}
+	cpioW := cpio.NewWriter(compressedW)
 	cpioW.WriteHeader(&cpio.Header{
 		Name: "/init.pb",
 		Size: int64(len(agentInitRaw)),
@@ -134,7 +136,7 @@
 	})
 	cpioW.Write(agentInitRaw)
 	cpioW.Close()
-	compressedOut.Close()
+	compressedW.Close()
 
 	agentParams := bootparam.Params{
 		bootparam.Param{Param: "quiet"},
diff --git a/go.mod b/go.mod
index cc88cd0..9de22c8 100644
--- a/go.mod
+++ b/go.mod
@@ -108,7 +108,6 @@
 	github.com/mitchellh/go-wordwrap v1.0.0
 	github.com/opencontainers/runc v1.1.3
 	github.com/packethost/packngo v0.29.0
-	github.com/pierrec/lz4/v4 v4.1.14
 	github.com/pkg/errors v0.9.1
 	github.com/pkg/sftp v1.10.1
 	github.com/prometheus/node_exporter v1.3.1
@@ -310,7 +309,7 @@
 	github.com/josharian/native v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/karrick/godirwalk v1.16.1 // indirect
-	github.com/klauspost/compress v1.14.2 // indirect
+	github.com/klauspost/compress v1.17.2
 	github.com/kr/fs v0.1.0 // indirect
 	github.com/kr/pty v1.1.8 // indirect
 	github.com/libopenstorage/openstorage v1.0.0 // indirect
diff --git a/go.sum b/go.sum
index 929e7c9..264395c 100644
--- a/go.sum
+++ b/go.sum
@@ -1432,8 +1432,9 @@
 github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
 github.com/klauspost/compress v1.13.4/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg=
 github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
-github.com/klauspost/compress v1.14.2 h1:S0OHlFk/Gbon/yauFJ4FfJJF5V0fc5HbBTJazi28pRw=
 github.com/klauspost/compress v1.14.2/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
+github.com/klauspost/compress v1.17.2 h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=
+github.com/klauspost/compress v1.17.2/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE=
 github.com/klauspost/cpuid v1.2.0/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/koneu/natend v0.0.0-20150829182554-ec0926ea948d/go.mod h1:QHb4k4cr1fQikUahfcRVPcEXiUgFsdIstGqlurL0XL4=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
@@ -1824,7 +1825,6 @@
 github.com/pierrec/lz4 v2.6.1+incompatible h1:9UY3+iC23yxF0UfGaYrGplQ+79Rg+h/q9FV9ix19jjM=
 github.com/pierrec/lz4 v2.6.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY=
 github.com/pierrec/lz4/v4 v4.1.8/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
-github.com/pierrec/lz4/v4 v4.1.14 h1:+fL8AQEZtz/ijeNnpduH0bROTu0O3NZAlPjQxGn8LwE=
 github.com/pierrec/lz4/v4 v4.1.14/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4=
 github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8=
 github.com/pingcap/errors v0.11.5-0.20210425183316-da1aaba5fb63 h1:+FZIDR/D97YOPik4N4lPDaUcLDF/EQPogxtlHB2ZZRM=
diff --git a/metropolis/node/build/def.bzl b/metropolis/node/build/def.bzl
index d71279c..4f0e30d 100644
--- a/metropolis/node/build/def.bzl
+++ b/metropolis/node/build/def.bzl
@@ -133,7 +133,7 @@
     return
 
 def _node_initramfs_impl(ctx):
-    initramfs_name = ctx.label.name + ".cpio.lz4"
+    initramfs_name = ctx.label.name + ".cpio.zst"
     initramfs = ctx.actions.declare_file(initramfs_name)
 
     _fsspec_core_impl(ctx, ctx.executable._mkcpio, initramfs)
diff --git a/metropolis/node/build/mkcpio/BUILD.bazel b/metropolis/node/build/mkcpio/BUILD.bazel
index f0bb779..3ea98ae 100644
--- a/metropolis/node/build/mkcpio/BUILD.bazel
+++ b/metropolis/node/build/mkcpio/BUILD.bazel
@@ -8,7 +8,7 @@
     deps = [
         "//metropolis/node/build/fsspec",
         "@com_github_cavaliergopher_cpio//:cpio",
-        "@com_github_pierrec_lz4_v4//:lz4",
+        "@com_github_klauspost_compress//zstd",
         "@org_golang_x_sys//unix",
     ],
 )
diff --git a/metropolis/node/build/mkcpio/main.go b/metropolis/node/build/mkcpio/main.go
index 1c9b39f..b8f99b9 100644
--- a/metropolis/node/build/mkcpio/main.go
+++ b/metropolis/node/build/mkcpio/main.go
@@ -10,7 +10,7 @@
 	"strings"
 
 	"github.com/cavaliergopher/cpio"
-	"github.com/pierrec/lz4/v4"
+	"github.com/klauspost/compress/zstd"
 	"golang.org/x/sys/unix"
 
 	"source.monogon.dev/metropolis/node/build/fsspec"
@@ -47,7 +47,7 @@
 	Inode interface{}
 }
 
-// Usage: -out <out-path.cpio.lz4> fsspec-path...
+// Usage: -out <out-path.cpio.zst> fsspec-path...
 func main() {
 	flag.Parse()
 	outFile, err := os.Create(*outPath)
@@ -55,8 +55,10 @@
 		log.Fatalf("Failed to open CPIO output file: %v", err)
 	}
 	defer outFile.Close()
-	compressedOut := lz4.NewWriter(outFile)
-	compressedOut.Apply(lz4.LegacyOption(true))
+	compressedOut, err := zstd.NewWriter(outFile)
+	if err != nil {
+		log.Fatalf("While initializing zstd writer: %v", err)
+	}
 	defer compressedOut.Close()
 	cpioWriter := cpio.NewWriter(compressedOut)
 	defer cpioWriter.Close()
@@ -168,7 +170,7 @@
 			}); err != nil {
 				log.Fatalf("Failed to write cpio header for file %q: %v", i.Path, err)
 			}
-			if _, err := io.Copy(cpioWriter, inF); err != nil {
+			if n, err := io.Copy(cpioWriter, inF); err != nil || n != inFStat.Size() {
 				log.Fatalf("Failed to copy file %q into cpio: %v", i.SourcePath, err)
 			}
 			inF.Close()
diff --git a/metropolis/test/launch/cluster/cluster.go b/metropolis/test/launch/cluster/cluster.go
index a312e86..1066d4d 100644
--- a/metropolis/test/launch/cluster/cluster.go
+++ b/metropolis/test/launch/cluster/cluster.go
@@ -790,7 +790,7 @@
 		if err := launch.RunMicroVM(ctxT, &launch.MicroVMOptions{
 			Name:                   "nanoswitch",
 			KernelPath:             "metropolis/test/ktest/vmlinux",
-			InitramfsPath:          "metropolis/test/nanoswitch/initramfs.cpio.lz4",
+			InitramfsPath:          "metropolis/test/nanoswitch/initramfs.cpio.zst",
 			ExtraNetworkInterfaces: switchPorts,
 			PortMap:                portMap,
 			GuestServiceMap:        guestSvcMap,
diff --git a/third_party/go/repositories.bzl b/third_party/go/repositories.bzl
index abfdfe4..2811cfb 100644
--- a/third_party/go/repositories.bzl
+++ b/third_party/go/repositories.bzl
@@ -3180,8 +3180,8 @@
     go_repository(
         name = "com_github_klauspost_compress",
         importpath = "github.com/klauspost/compress",
-        sum = "h1:S0OHlFk/Gbon/yauFJ4FfJJF5V0fc5HbBTJazi28pRw=",
-        version = "v1.14.2",
+        sum = "h1:RlWWUY/Dr4fL8qk9YG7DTZ7PDgME2V4csBXA8L/ixi4=",
+        version = "v1.17.2",
     )
     go_repository(
         name = "com_github_klauspost_cpuid",