cloud: split shepherd up
Change-Id: I8e386d9eaaf17543743e1e8a37a8d71426910d59
Reviewed-on: https://review.monogon.dev/c/monogon/+/2213
Reviewed-by: Serge Bazanski <serge@monogon.tech>
Tested-by: Jenkins CI
diff --git a/cloud/shepherd/provider/equinix/recoverer_test.go b/cloud/shepherd/provider/equinix/recoverer_test.go
new file mode 100644
index 0000000..109c375
--- /dev/null
+++ b/cloud/shepherd/provider/equinix/recoverer_test.go
@@ -0,0 +1,181 @@
+package main
+
+import (
+ "context"
+ "crypto/ed25519"
+ "crypto/rand"
+ "testing"
+ "time"
+
+ "github.com/packethost/packngo"
+ "golang.org/x/time/rate"
+
+ "source.monogon.dev/cloud/bmaas/bmdb"
+ "source.monogon.dev/cloud/bmaas/bmdb/model"
+ "source.monogon.dev/cloud/lib/component"
+ "source.monogon.dev/cloud/shepherd/manager"
+)
+
+type recovererDut struct {
+ f *fakequinix
+ r *manager.Recoverer
+ bmdb *bmdb.Connection
+ ctx context.Context
+}
+
+func newRecovererDut(t *testing.T) *recovererDut {
+ t.Helper()
+
+ rc := manager.RecovererConfig{
+ ControlLoopConfig: manager.ControlLoopConfig{
+ DBQueryLimiter: rate.NewLimiter(rate.Every(time.Second), 10),
+ },
+ }
+
+ sc := providerConfig{
+ ProjectId: "noproject",
+ KeyLabel: "somekey",
+ DevicePrefix: "test-",
+ }
+
+ _, key, _ := ed25519.GenerateKey(rand.Reader)
+ k := manager.SSHKey{
+ Key: key,
+ }
+
+ f := newFakequinix(sc.ProjectId, 100)
+ provider, err := sc.New(&k, f)
+ if err != nil {
+ t.Fatalf("Could not create Provider: %v", err)
+ }
+
+ r, err := manager.NewRecoverer(provider, rc)
+ if err != nil {
+ t.Fatalf("Could not create Initializer: %v", err)
+ }
+
+ b := bmdb.BMDB{
+ Config: bmdb.Config{
+ Database: component.CockroachConfig{
+ InMemory: true,
+ },
+ ComponentName: "test",
+ RuntimeInfo: "test",
+ },
+ }
+ conn, err := b.Open(true)
+ if err != nil {
+ t.Fatalf("Could not create in-memory BMDB: %v", err)
+ }
+
+ ctx, ctxC := context.WithCancel(context.Background())
+ t.Cleanup(ctxC)
+
+ go manager.RunControlLoop(ctx, conn, r)
+
+ return &recovererDut{
+ f: f,
+ r: r,
+ bmdb: conn,
+ ctx: ctx,
+ }
+}
+
+// TestRecoverySmokes makes sure that the Initializer in recovery mode doesn't go
+// up in flames on the happy path.
+func TestRecoverySmokes(t *testing.T) {
+ dut := newRecovererDut(t)
+ f := dut.f
+ ctx := dut.ctx
+ conn := dut.bmdb
+
+ reservations, _ := f.ListReservations(ctx, "fake")
+
+ sess, err := conn.StartSession(ctx)
+ if err != nil {
+ t.Fatalf("Failed to create BMDB session: %v", err)
+ }
+
+ // Create test machine that should be selected for recovery.
+ // First in Fakequinix...
+ dev, _ := f.CreateDevice(ctx, &packngo.DeviceCreateRequest{
+ Hostname: "test-devices",
+ OS: "fake",
+ ProjectID: "fake",
+ HardwareReservationID: reservations[0].ID,
+ ProjectSSHKeys: []string{},
+ })
+ // ... and in BMDB.
+ err = sess.Transact(ctx, func(q *model.Queries) error {
+ machine, err := q.NewMachine(ctx)
+ if err != nil {
+ return err
+ }
+ err = q.MachineAddProvided(ctx, model.MachineAddProvidedParams{
+ MachineID: machine.MachineID,
+ Provider: model.ProviderEquinix,
+ ProviderID: dev.ID,
+ })
+ if err != nil {
+ return err
+ }
+ return q.MachineSetAgentStarted(ctx, model.MachineSetAgentStartedParams{
+ MachineID: machine.MachineID,
+ AgentStartedAt: time.Now().Add(time.Hour * -10),
+ AgentPublicKey: []byte("fakefakefakefake"),
+ })
+ })
+ if err != nil {
+ t.Fatalf("Failed to create test machine: %v", err)
+ }
+
+ // Expect to find 0 machines needing recovery.
+ deadline := time.Now().Add(10 * time.Second)
+ for {
+ if time.Now().After(deadline) {
+ t.Fatalf("Machines did not get processed in time")
+ }
+ time.Sleep(100 * time.Millisecond)
+
+ var machines []model.MachineProvided
+ err = sess.Transact(ctx, func(q *model.Queries) error {
+ var err error
+ machines, err = q.GetMachineForAgentRecovery(ctx, model.GetMachineForAgentRecoveryParams{
+ Limit: 100,
+ Provider: model.ProviderEquinix,
+ })
+ return err
+ })
+ if err != nil {
+ t.Fatalf("Failed to run Transaction: %v", err)
+ }
+ if len(machines) == 0 {
+ break
+ }
+ }
+
+ // Expect the target machine to have been rebooted.
+ dut.f.mu.Lock()
+ reboots := dut.f.reboots[dev.ID]
+ dut.f.mu.Unlock()
+ if want, got := 1, reboots; want != got {
+ t.Fatalf("Wanted %d reboot, got %d", want, got)
+ }
+
+ // Expect machine to now be available again for agent start.
+ var machines []model.MachineProvided
+ err = sess.Transact(ctx, func(q *model.Queries) error {
+ var err error
+ machines, err = q.GetMachinesForAgentStart(ctx, model.GetMachinesForAgentStartParams{
+ Limit: 100,
+ Provider: model.ProviderEquinix,
+ })
+ return err
+ })
+ if err != nil {
+ t.Fatalf("Failed to run Transaction: %v", err)
+ }
+ if want, got := 1, len(machines); want != got {
+ t.Fatalf("Wanted %d machine ready for agent start, got %d", want, got)
+ }
+}