cloud/agent: use new OS image format for install
This switches the Agent installation method to the new OS image format
based on OCI artifacts. OS images are now fetched from an OCI registry.
Change-Id: Icd59a2c808fd607b95d8aaa8e60022a27fd2d091
Reviewed-on: https://review.monogon.dev/c/monogon/+/4091
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
Tested-by: Jenkins CI
diff --git a/cloud/agent/BUILD.bazel b/cloud/agent/BUILD.bazel
index 3d342ae..b02f994 100644
--- a/cloud/agent/BUILD.bazel
+++ b/cloud/agent/BUILD.bazel
@@ -15,7 +15,6 @@
deps = [
"//cloud/agent/api",
"//cloud/bmaas/server/api",
- "//go/logging",
"//metropolis/node/core/devmgr",
"//metropolis/node/core/network",
"//osbase/blockdev",
@@ -24,6 +23,8 @@
"//osbase/efivarfs",
"//osbase/net/proto",
"//osbase/nvme",
+ "//osbase/oci/osimage",
+ "//osbase/oci/registry",
"//osbase/pki",
"//osbase/scsi",
"//osbase/smbios",
diff --git a/cloud/agent/agent.go b/cloud/agent/agent.go
index c185b83..05b2daf 100644
--- a/cloud/agent/agent.go
+++ b/cloud/agent/agent.go
@@ -170,7 +170,8 @@
installationReport = &bpb.OSInstallationReport{
Generation: res.InstallationRequest.Generation,
}
- if err := install(res.InstallationRequest, agentInit.NetworkConfig, l); err != nil {
+ installCtx, cancel := context.WithTimeout(ctx, 15*time.Minute)
+ if err := install(installCtx, res.InstallationRequest, agentInit.NetworkConfig); err != nil {
l.Errorf("Installation failed: %v", err)
installationReport.Result = &bpb.OSInstallationReport_Error_{
Error: &bpb.OSInstallationReport_Error{
@@ -183,6 +184,7 @@
Success: &bpb.OSInstallationReport_Success{},
}
}
+ cancel()
} else {
time.Sleep(30 * time.Second)
}
diff --git a/cloud/agent/e2e/BUILD.bazel b/cloud/agent/e2e/BUILD.bazel
index c859c2b..7df7ad6 100644
--- a/cloud/agent/e2e/BUILD.bazel
+++ b/cloud/agent/e2e/BUILD.bazel
@@ -5,13 +5,13 @@
srcs = ["main_test.go"],
data = [
"//cloud/agent/takeover:initramfs",
- "//metropolis/installer/test/testos:testos_bundle",
+ "//metropolis/installer/test/testos:testos_image",
"//third_party/edk2:OVMF_CODE.fd",
"//third_party/edk2:OVMF_VARS.fd",
"//third_party/linux",
],
x_defs = {
- "xBundleFilePath": "$(rlocationpath //metropolis/installer/test/testos:testos_bundle )",
+ "xImagePath": "$(rlocationpath //metropolis/installer/test/testos:testos_image )",
"xOvmfVarsPath": "$(rlocationpath //third_party/edk2:OVMF_VARS.fd )",
"xOvmfCodePath": "$(rlocationpath //third_party/edk2:OVMF_CODE.fd )",
"xKernelPath": "$(rlocationpath //third_party/linux )",
@@ -21,6 +21,8 @@
"//cloud/agent/api",
"//cloud/bmaas/server/api",
"//metropolis/proto/api",
+ "//osbase/oci",
+ "//osbase/oci/registry",
"//osbase/pki",
"@com_github_cavaliergopher_cpio//:cpio",
"@com_github_klauspost_compress//zstd",
diff --git a/cloud/agent/e2e/main_test.go b/cloud/agent/e2e/main_test.go
index bcc38f3..8dbce49 100644
--- a/cloud/agent/e2e/main_test.go
+++ b/cloud/agent/e2e/main_test.go
@@ -16,7 +16,6 @@
"math/big"
"net"
"net/http"
- "net/url"
"os"
"os/exec"
"strings"
@@ -34,6 +33,8 @@
apb "source.monogon.dev/cloud/agent/api"
bpb "source.monogon.dev/cloud/bmaas/server/api"
mpb "source.monogon.dev/metropolis/proto/api"
+ "source.monogon.dev/osbase/oci"
+ "source.monogon.dev/osbase/oci/registry"
"source.monogon.dev/osbase/pki"
)
@@ -41,7 +42,7 @@
// These are filled by bazel at linking time with the canonical path of
// their corresponding file. Inside the init function we resolve it
// with the rules_go runfiles package to the real path.
- xBundleFilePath string
+ xImagePath string
xOvmfVarsPath string
xOvmfCodePath string
xKernelPath string
@@ -51,7 +52,7 @@
func init() {
var err error
for _, path := range []*string{
- &xBundleFilePath, &xOvmfVarsPath, &xOvmfCodePath,
+ &xImagePath, &xOvmfVarsPath, &xOvmfCodePath,
&xKernelPath, &xInitramfsOrigPath,
} {
*path, err = runfiles.Rlocation(*path)
@@ -95,15 +96,26 @@
Port: 3000,
}
- blobAddr := net.TCPAddr{
+ registryAddr := net.TCPAddr{
IP: net.IPv4(10, 42, 0, 6),
Port: 80,
}
+ image, err := oci.ReadLayout(xImagePath)
+ if err != nil {
+ t.Fatal(err)
+ }
+
f.installationRequest = &bpb.OSInstallationRequest{
Generation: 5,
Type: &bpb.OSInstallationRequest_Metropolis{Metropolis: &bpb.MetropolisInstallationRequest{
- BundleUrl: (&url.URL{Scheme: "http", Host: blobAddr.String(), Path: "/bundle.bin"}).String(),
+ OsImage: &mpb.OSImageRef{
+ Scheme: "http",
+ Host: registryAddr.String(),
+ Repository: "testos",
+ Tag: "latest",
+ Digest: image.ManifestDigest,
+ },
NodeParameters: &mpb.NodeParameters{},
RootDevice: "vda",
}},
@@ -165,18 +177,16 @@
go s.Serve(grpcLis)
grpcListenAddr := grpcLis.Addr().(*net.TCPAddr)
- m := http.NewServeMux()
+ registryServer := registry.NewServer()
+ registryServer.AddImage("testos", "latest", image)
- m.HandleFunc("/bundle.bin", func(w http.ResponseWriter, req *http.Request) {
- http.ServeFile(w, req, xBundleFilePath)
- })
- blobLis, err := net.Listen("tcp", "127.0.0.1:0")
+ registryLis, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatal(err)
}
- blobListenAddr := blobLis.Addr().(*net.TCPAddr)
- go http.Serve(blobLis, m)
+ registryListenAddr := registryLis.Addr().(*net.TCPAddr)
+ go http.Serve(registryLis, registryServer)
_, privateKey, err := ed25519.GenerateKey(rand.Reader)
if err != nil {
@@ -237,7 +247,7 @@
compressedW.Close()
grpcGuestFwd := fmt.Sprintf("guestfwd=tcp:%s-tcp:127.0.0.1:%d", grpcAddr.String(), grpcListenAddr.Port)
- blobGuestFwd := fmt.Sprintf("guestfwd=tcp:%s-tcp:127.0.0.1:%d", blobAddr.String(), blobListenAddr.Port)
+ registryGuestFwd := fmt.Sprintf("guestfwd=tcp:%s-tcp:127.0.0.1:%d", registryAddr.String(), registryListenAddr.Port)
ovmfVars, err := os.CreateTemp("", "agent-ovmf-vars")
if err != nil {
@@ -257,7 +267,7 @@
"-drive", "if=pflash,format=raw,readonly=on,file=" + xOvmfCodePath,
"-drive", "if=pflash,format=raw,file=" + ovmfVars.Name(),
"-drive", "if=virtio,format=raw,cache=unsafe,file=" + rootDisk.Name(),
- "-netdev", fmt.Sprintf("user,id=net0,net=10.42.0.0/24,dhcpstart=10.42.0.10,%s,%s", grpcGuestFwd, blobGuestFwd),
+ "-netdev", fmt.Sprintf("user,id=net0,net=10.42.0.0/24,dhcpstart=10.42.0.10,%s,%s", grpcGuestFwd, registryGuestFwd),
"-device", "virtio-net-pci,netdev=net0,mac=22:d5:8e:76:1d:07",
"-device", "virtio-rng-pci",
"-serial", "stdio",
diff --git a/cloud/agent/install.go b/cloud/agent/install.go
index 914b0be..0693595 100644
--- a/cloud/agent/install.go
+++ b/cloud/agent/install.go
@@ -4,60 +4,45 @@
package main
import (
- "archive/zip"
- "bytes"
+ "context"
_ "embed"
"errors"
"fmt"
- "net/http"
"os"
"path/filepath"
+ "time"
"github.com/cenkalti/backoff/v4"
"google.golang.org/protobuf/proto"
bpb "source.monogon.dev/cloud/bmaas/server/api"
- "source.monogon.dev/go/logging"
+ npb "source.monogon.dev/osbase/net/proto"
+
"source.monogon.dev/osbase/blockdev"
"source.monogon.dev/osbase/build/mkimage/osimage"
"source.monogon.dev/osbase/efivarfs"
- npb "source.monogon.dev/osbase/net/proto"
+ ociosimage "source.monogon.dev/osbase/oci/osimage"
+ "source.monogon.dev/osbase/oci/registry"
"source.monogon.dev/osbase/structfs"
+ "source.monogon.dev/osbase/supervisor"
)
//go:embed metropolis/node/core/abloader/abloader.efi
var abloader []byte
-// zipBlob looks up a file in a [zip.Reader] and adapts it to [structfs.Blob].
-func zipBlob(reader *zip.Reader, name string) (zipFileBlob, error) {
- for _, file := range reader.File {
- if file.Name == name {
- return zipFileBlob{file}, nil
- }
- }
- return zipFileBlob{}, fmt.Errorf("file %q not found", name)
-}
-
-type zipFileBlob struct {
- *zip.File
-}
-
-func (f zipFileBlob) Size() int64 {
- return int64(f.File.UncompressedSize64)
-}
-
// install dispatches OSInstallationRequests to the appropriate installer
// method
-func install(req *bpb.OSInstallationRequest, netConfig *npb.Net, l logging.Leveled) error {
+func install(ctx context.Context, req *bpb.OSInstallationRequest, netConfig *npb.Net) error {
switch reqT := req.Type.(type) {
case *bpb.OSInstallationRequest_Metropolis:
- return installMetropolis(reqT.Metropolis, netConfig, l)
+ return installMetropolis(ctx, reqT.Metropolis, netConfig)
default:
return errors.New("unknown installation request type")
}
}
-func installMetropolis(req *bpb.MetropolisInstallationRequest, netConfig *npb.Net, l logging.Leveled) error {
+func installMetropolis(ctx context.Context, req *bpb.MetropolisInstallationRequest, netConfig *npb.Net) error {
+ l := supervisor.Logger(ctx)
// Validate we are running via EFI.
if _, err := os.Stat("/sys/firmware/efi"); os.IsNotExist(err) {
// nolint:ST1005
@@ -70,63 +55,47 @@
req.NodeParameters.NetworkConfig = netConfig
}
- // Download into a buffer as ZIP files cannot efficiently be read from
- // HTTP in Go as the ReaderAt has no way of indicating continuous sections,
- // thus a ton of small range requests would need to be used, causing
- // a huge latency penalty as well as costing a lot of money on typical
- // object storages. This should go away when we switch to a better bundle
- // format which can be streamed.
- var bundleRaw bytes.Buffer
- b := backoff.NewExponentialBackOff()
- err := backoff.Retry(func() error {
- bundleRes, err := http.Get(req.BundleUrl)
- if err != nil {
- l.Warningf("Metropolis bundle request failed: %v", err)
- return fmt.Errorf("HTTP request failed: %w", err)
- }
- defer bundleRes.Body.Close()
- switch bundleRes.StatusCode {
- case http.StatusTooEarly, http.StatusTooManyRequests,
- http.StatusInternalServerError, http.StatusBadGateway,
- http.StatusServiceUnavailable, http.StatusGatewayTimeout:
- l.Warningf("Metropolis bundle request HTTP %d error, retrying", bundleRes.StatusCode)
- return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
- default:
- // Non-standard code range used for proxy-related issue by various
- // vendors. Treat as non-permanent error.
- if bundleRes.StatusCode >= 520 && bundleRes.StatusCode < 599 {
- l.Warningf("Metropolis bundle request HTTP %d error, retrying", bundleRes.StatusCode)
- return fmt.Errorf("HTTP error %d", bundleRes.StatusCode)
- }
- if bundleRes.StatusCode != 200 {
- l.Errorf("Metropolis bundle request permanent HTTP %d error, aborting", bundleRes.StatusCode)
- return backoff.Permanent(fmt.Errorf("HTTP error %d", bundleRes.StatusCode))
- }
- }
- if _, err := bundleRaw.ReadFrom(bundleRes.Body); err != nil {
- l.Warningf("Metropolis bundle download failed, retrying: %v", err)
- bundleRaw.Reset()
- return err
- }
- return nil
- }, b)
- if err != nil {
- return fmt.Errorf("error downloading Metropolis bundle: %w", err)
+ if req.OsImage == nil {
+ return fmt.Errorf("missing OS image in OS installation request")
}
- l.Info("Metropolis Bundle downloaded")
- bundle, err := zip.NewReader(bytes.NewReader(bundleRaw.Bytes()), int64(bundleRaw.Len()))
- if err != nil {
- return fmt.Errorf("failed to open node bundle: %w", err)
+ if req.OsImage.Digest == "" {
+ return fmt.Errorf("missing digest in OS installation request")
}
- efiPayload, err := zipBlob(bundle, "kernel_efi.efi")
- if err != nil {
- return fmt.Errorf("invalid bundle: %w", err)
+
+ client := ®istry.Client{
+ GetBackOff: func() backoff.BackOff {
+ return backoff.NewExponentialBackOff()
+ },
+ RetryNotify: func(err error, d time.Duration) {
+ l.Warningf("Error while fetching OS image, retrying in %v: %v", d, err)
+ },
+ UserAgent: "Monogon-Cloud-Agent",
+ Scheme: req.OsImage.Scheme,
+ Host: req.OsImage.Host,
+ Repository: req.OsImage.Repository,
}
- systemImage, err := zipBlob(bundle, "verity_rootfs.img")
+
+ image, err := client.Read(ctx, req.OsImage.Tag, req.OsImage.Digest)
if err != nil {
- return fmt.Errorf("invalid bundle: %w", err)
+ return fmt.Errorf("failed to fetch OS image: %w", err)
}
+ osImage, err := ociosimage.Read(image)
+ if err != nil {
+ return fmt.Errorf("failed to fetch OS image: %w", err)
+ }
+
+ efiPayload, err := osImage.Payload("kernel.efi")
+ if err != nil {
+ return fmt.Errorf("cannot open EFI payload in OS image: %w", err)
+ }
+ systemImage, err := osImage.Payload("system")
+ if err != nil {
+ return fmt.Errorf("cannot open system image in OS image: %w", err)
+ }
+
+ l.Info("OS image config downloaded")
+
nodeParamsRaw, err := proto.Marshal(req.NodeParameters)
if err != nil {
return fmt.Errorf("failed marshaling: %w", err)
diff --git a/cloud/bmaas/server/api/agent.proto b/cloud/bmaas/server/api/agent.proto
index 67ec4a3..3b3c5b2 100644
--- a/cloud/bmaas/server/api/agent.proto
+++ b/cloud/bmaas/server/api/agent.proto
@@ -3,6 +3,7 @@
option go_package = "source.monogon.dev/cloud/bmaas/server/api";
import "metropolis/proto/api/configuration.proto";
+import "metropolis/proto/api/management.proto";
import "cloud/agent/api/hwreport.proto";
// AgentCallback runs on the BMDB Server and exposes a gRPC interface to agents
@@ -65,8 +66,9 @@
}
message MetropolisInstallationRequest {
- // An HTTPS URL to a Metropolis bundle containing the OS to install.
- string bundle_url = 1;
+ reserved 1;
+ // Parameters for fetching the OS image to install.
+ metropolis.proto.api.OSImageRef os_image = 4;
// Node parameters to be supplied to the new node. Note that network_config
// is automatically filled out if coming from the takeover.
metropolis.proto.api.NodeParameters node_parameters = 2;
@@ -94,4 +96,4 @@
// If set, the control plane is requesting the installation of an operating
// system.
OSInstallationRequest installation_request = 1;
-}
\ No newline at end of file
+}
diff --git a/metropolis/installer/test/testos/BUILD.bazel b/metropolis/installer/test/testos/BUILD.bazel
index e8d8700..34f346b 100644
--- a/metropolis/installer/test/testos/BUILD.bazel
+++ b/metropolis/installer/test/testos/BUILD.bazel
@@ -1,5 +1,4 @@
load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library")
-load("@rules_pkg//:pkg.bzl", "pkg_zip")
load("//osbase/build/mkerofs:def.bzl", "erofs_image")
load("//osbase/build/mkoci:def.bzl", "oci_os_image")
load("//osbase/build/mkpayload:def.bzl", "efi_unified_kernel_image")
@@ -24,18 +23,6 @@
verity = ":verity_rootfs",
)
-# An intermediary "bundle" format until we finalize the actual bundle format. This is NOT stable until migrated
-# to the actual bundle format.
-# TODO(lorenz): Replace this
-pkg_zip(
- name = "testos_bundle",
- srcs = [
- ":kernel_efi",
- ":verity_rootfs",
- ],
- visibility = ["//visibility:public"],
-)
-
oci_os_image(
name = "testos_image",
srcs = {
diff --git a/metropolis/proto/api/management.proto b/metropolis/proto/api/management.proto
index 9a81082..81d4ca0 100644
--- a/metropolis/proto/api/management.proto
+++ b/metropolis/proto/api/management.proto
@@ -481,6 +481,21 @@
ActivationMode activation_mode = 3;
}
+// OSImageRef contains the parameters for fetching an OS image from an OCI
+// registry.
+message OSImageRef {
+ // Scheme must be either http or https.
+ string scheme = 1;
+ // Host with optional port.
+ string host = 2;
+ // Repository containing the image.
+ string repository = 3;
+ // Tag is optional. If not set, the manifest is fetched by digest instead.
+ string tag = 4;
+ // Digest is required. It is used to verify the manifest.
+ string digest = 5;
+}
+
message UpdateNodeResponse {}
message UpdateNodeLabelsRequest {
@@ -538,4 +553,4 @@
message ConfigureClusterResponse {
// Resulting config as set on the server, merged from the users new_config.
common.ClusterConfiguration resulting_config = 1;
-}
\ No newline at end of file
+}