c/bmaas/bmdb: implement OS installation flow
This adds two new tags: OSInstallationRequest and
OSInstallationResponse. It also implements interacting with these tags
from the agent side.
This doesn't yet implement any admin/user-facing API to actually request
OS installation, for now we just exercise this in tests.
Change-Id: I2e31a8369a3a8670bb92bcacfb8231a0d5e1b9fd
Reviewed-on: https://review.monogon.dev/c/monogon/+/1011
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
Tested-by: Jenkins CI
diff --git a/cloud/bmaas/server/BUILD.bazel b/cloud/bmaas/server/BUILD.bazel
index ee491d1..2c96a03 100644
--- a/cloud/bmaas/server/BUILD.bazel
+++ b/cloud/bmaas/server/BUILD.bazel
@@ -40,5 +40,6 @@
"//metropolis/node/core/rpc",
"@com_github_google_uuid//:uuid",
"@org_golang_google_grpc//:go_default_library",
+ "@org_golang_google_protobuf//proto",
],
)
diff --git a/cloud/bmaas/server/agent_callback_service.go b/cloud/bmaas/server/agent_callback_service.go
index f058213..b6e0e71 100644
--- a/cloud/bmaas/server/agent_callback_service.go
+++ b/cloud/bmaas/server/agent_callback_service.go
@@ -3,6 +3,7 @@
import (
"context"
"crypto/ed25519"
+ "encoding/hex"
"errors"
"fmt"
"time"
@@ -57,6 +58,7 @@
return fmt.Errorf("AuthenticateAgentConnection: %w", err)
}
if len(agents) < 1 {
+ klog.Errorf("No agent for %s/%s", machineId.String(), hex.EncodeToString(pk))
return errAgentUnauthenticated
}
return nil
@@ -76,7 +78,7 @@
if req.HardwareReport != nil {
hwraw, err = proto.Marshal(req.HardwareReport)
if err != nil {
- return nil, status.Errorf(codes.InvalidArgument, "could not serialize harcware report: %v", err)
+ return nil, status.Errorf(codes.InvalidArgument, "could not serialize hardware report: %v", err)
}
}
@@ -92,6 +94,13 @@
return fmt.Errorf("hardware report upsert: %w", err)
}
}
+ // Upsert os installation report if submitted.
+ if req.InstallationReport != nil {
+ err = q.MachineSetOSInstallationReport(ctx, model.MachineSetOSInstallationReportParams{
+ MachineID: machineId,
+ Generation: req.InstallationReport.Generation,
+ })
+ }
return q.MachineSetAgentHeartbeat(ctx, model.MachineSetAgentHeartbeatParams{
MachineID: machineId,
AgentHeartbeatAt: time.Now(),
@@ -101,6 +110,35 @@
klog.Errorf("Could not submit heartbeat: %v", err)
return nil, status.Error(codes.Unavailable, "could not submit heartbeat")
}
+ klog.Infof("Heartbeat from %s/%s", machineId.String(), hex.EncodeToString(pk))
- return &apb.AgentHeartbeatResponse{}, nil
+ // Get installation request for machine if present.
+ var installRequest *apb.OSInstallationRequest
+ err = session.Transact(ctx, func(q *model.Queries) error {
+ reqs, err := q.GetExactMachineForOSInstallation(ctx, model.GetExactMachineForOSInstallationParams{
+ MachineID: machineId,
+ Limit: 1,
+ })
+ if err != nil {
+ return fmt.Errorf("GetExactMachineForOSInstallation: %w", err)
+ }
+ if len(reqs) > 0 {
+ raw := reqs[0].OsInstallationRequestRaw
+ var preq apb.OSInstallationRequest
+ if err := proto.Unmarshal(raw, &preq); err != nil {
+ return fmt.Errorf("could not decode stored OS installation request: %w", err)
+ }
+ installRequest = &preq
+ }
+ return nil
+ })
+ if err != nil {
+ // Do not fail entire request. Instead, just log an error.
+ // TODO(q3k): alert on this
+ klog.Errorf("Failure during OS installation request retrieval: %v", err)
+ }
+
+ return &apb.AgentHeartbeatResponse{
+ InstallationRequest: installRequest,
+ }, nil
}
diff --git a/cloud/bmaas/server/agent_callback_service_test.go b/cloud/bmaas/server/agent_callback_service_test.go
index bc3201a..320bb68 100644
--- a/cloud/bmaas/server/agent_callback_service_test.go
+++ b/cloud/bmaas/server/agent_callback_service_test.go
@@ -9,6 +9,7 @@
"github.com/google/uuid"
"google.golang.org/grpc"
+ "google.golang.org/protobuf/proto"
"source.monogon.dev/cloud/bmaas/bmdb"
"source.monogon.dev/cloud/bmaas/bmdb/model"
@@ -114,3 +115,138 @@
// TODO(q3k): test hardware report being attached once we have some debug API
// for tags.
}
+
+// TestOSInstallationFlow exercises the agent's OS installation request/report
+// functionality.
+func TestOSInstallationFlow(t *testing.T) {
+ s := dut()
+ ctx, ctxC := context.WithCancel(context.Background())
+ defer ctxC()
+ s.Start(ctx)
+
+ pub, priv, err := ed25519.GenerateKey(rand.Reader)
+ if err != nil {
+ t.Fatalf("could not generate keypair: %v", err)
+ }
+
+ sess, err := s.bmdb.StartSession(ctx)
+ if err != nil {
+ t.Fatalf("could not start session")
+ }
+
+ heartbeat := func(mid uuid.UUID, report *apb.OSInstallationReport) (*apb.AgentHeartbeatResponse, error) {
+ creds, err := rpc.NewEphemeralCredentials(priv, nil)
+ if err != nil {
+ t.Fatalf("could not generate ephemeral credentials: %v", err)
+ }
+ conn, err := grpc.Dial(s.ListenPublic, grpc.WithTransportCredentials(creds))
+ if err != nil {
+ t.Fatalf("Dial failed: %v", err)
+ }
+ defer conn.Close()
+
+ stub := apb.NewAgentCallbackClient(conn)
+ return stub.Heartbeat(ctx, &apb.AgentHeartbeatRequest{
+ MachineId: mid.String(),
+ HardwareReport: &apb.AgentHardwareReport{},
+ InstallationReport: report,
+ })
+ }
+
+ // Create machine with no OS installation request.
+ var machine model.Machine
+ err = sess.Transact(ctx, func(q *model.Queries) error {
+ machine, err = q.NewMachine(ctx)
+ if err != nil {
+ return err
+ }
+ err = q.MachineAddProvided(ctx, model.MachineAddProvidedParams{
+ MachineID: machine.MachineID,
+ Provider: model.ProviderEquinix,
+ ProviderID: "123",
+ })
+ if err != nil {
+ return err
+ }
+ return q.MachineSetAgentStarted(ctx, model.MachineSetAgentStartedParams{
+ MachineID: machine.MachineID,
+ AgentStartedAt: time.Now(),
+ AgentPublicKey: pub,
+ })
+ })
+ if err != nil {
+ t.Fatalf("could not create machine: %v", err)
+ }
+
+ // Expect successful heartbeat, but no OS installation request.
+ hbr, err := heartbeat(machine.MachineID, nil)
+ if err != nil {
+ t.Fatalf("heartbeat: %v", err)
+ }
+ if hbr.InstallationRequest != nil {
+ t.Fatalf("expected no installation request")
+ }
+
+ // Now add an OS installation request tag, and expect it to be returned.
+ err = sess.Transact(ctx, func(q *model.Queries) error {
+ req := apb.OSInstallationRequest{
+ Generation: 123,
+ }
+ raw, _ := proto.Marshal(&req)
+ return q.MachineSetOSInstallationRequest(ctx, model.MachineSetOSInstallationRequestParams{
+ MachineID: machine.MachineID,
+ Generation: req.Generation,
+ OsInstallationRequestRaw: raw,
+ })
+ })
+ if err != nil {
+ t.Fatalf("could not add os installation request to machine: %v", err)
+ }
+
+ // Heartbeat a few times just to make sure every response is as expected.
+ for i := 0; i < 3; i++ {
+ hbr, err = heartbeat(machine.MachineID, nil)
+ if err != nil {
+ t.Fatalf("heartbeat: %v", err)
+ }
+ if hbr.InstallationRequest == nil || hbr.InstallationRequest.Generation != 123 {
+ t.Fatalf("expected installation request for generation 123, got %+v", hbr.InstallationRequest)
+ }
+ }
+
+ // Submit a report, expect no more request.
+ hbr, err = heartbeat(machine.MachineID, &apb.OSInstallationReport{Generation: 123})
+ if err != nil {
+ t.Fatalf("heartbeat: %v", err)
+ }
+ if hbr.InstallationRequest != nil {
+ t.Fatalf("expected no installation request")
+ }
+
+ // Submit a newer request, expect it to be returned.
+ err = sess.Transact(ctx, func(q *model.Queries) error {
+ req := apb.OSInstallationRequest{
+ Generation: 234,
+ }
+ raw, _ := proto.Marshal(&req)
+ return q.MachineSetOSInstallationRequest(ctx, model.MachineSetOSInstallationRequestParams{
+ MachineID: machine.MachineID,
+ Generation: req.Generation,
+ OsInstallationRequestRaw: raw,
+ })
+ })
+ if err != nil {
+ t.Fatalf("could not update installation request: %v", err)
+ }
+
+ // Heartbeat a few times just to make sure every response is as expected.
+ for i := 0; i < 3; i++ {
+ hbr, err = heartbeat(machine.MachineID, nil)
+ if err != nil {
+ t.Fatalf("heartbeat: %v", err)
+ }
+ if hbr.InstallationRequest == nil || hbr.InstallationRequest.Generation != 234 {
+ t.Fatalf("expected installation request for generation 234, got %+v", hbr.InstallationRequest)
+ }
+ }
+}
diff --git a/cloud/bmaas/server/api/agent.proto b/cloud/bmaas/server/api/agent.proto
index c08c767..0ed29c3 100644
--- a/cloud/bmaas/server/api/agent.proto
+++ b/cloud/bmaas/server/api/agent.proto
@@ -21,6 +21,14 @@
// TODO(lorenz): implement
}
+// OSInstallationReport is submitted from the agent to the BMDB server after
+// successful OS installation.
+message OSInstallationReport {
+ // generation must be set to the same value as 'generation' in the
+ // OSInstallation request which triggered the OS installation
+ int64 generation = 1;
+}
+
message AgentHeartbeatRequest {
// MachineID that this agent represents. Technically not necessary since
// keypairs between agents should be unique, but this provides an extra layer
@@ -29,8 +37,26 @@
// Optional hardware report to be upserted for this machine. An agent should
// submit one at least once after it's started, as early as it can.
AgentHardwareReport hardware_report = 2;
+ // Optional installation report sent to be upserted to this machine. An agent
+ // should submit one after it successfully installed an operating system for
+ // a given OSInstallationRequest.
+ OSInstallationReport installation_report = 3;
+}
+
+// OSInstallationRequest is provided to the agent by the BMDB server, from
+// a responding BMDB tag, when an OS installation request is pending.
+message OSInstallationRequest {
+ // generation is the 'version' of the OS installation request, and will always
+ // be incremented within the BMDB when a new OS installation request is
+ // submitted. The agent must pipe this through to the OSInstallationReport to
+ // let the rest of the system know which OS installation request it actually
+ // fulfilled.
+ int64 generation = 1;
+ // TODO(lorenz): implement
}
message AgentHeartbeatResponse {
- // Agent actions (like install, reboot, etc) go here.
+ // If set, the control plane is requesting the installation of an operating
+ // system.
+ OSInstallationRequest installation_request = 1;
}
\ No newline at end of file