c/s/e/manager: add sleep after reboot
This is required as Equinix doesn't reboot the machines synchronously
during the API call.
Change-Id: Ie01b7ed5c57868e1c60a33af934a50e7338ce3ac
Reviewed-on: https://review.monogon.dev/c/monogon/+/1595
Tested-by: Jenkins CI
Reviewed-by: Serge Bazanski <serge@monogon.tech>
diff --git a/cloud/shepherd/equinix/manager/recoverer.go b/cloud/shepherd/equinix/manager/recoverer.go
index f323d03..4ec73af 100644
--- a/cloud/shepherd/equinix/manager/recoverer.go
+++ b/cloud/shepherd/equinix/manager/recoverer.go
@@ -2,7 +2,9 @@
import (
"context"
+ "flag"
"fmt"
+ "time"
"k8s.io/klog/v2"
@@ -12,10 +14,12 @@
type RecovererConfig struct {
ControlLoopConfig
+ RebootWaitSeconds int
}
func (r *RecovererConfig) RegisterFlags() {
r.ControlLoopConfig.RegisterFlags("recoverer")
+ flag.IntVar(&r.RebootWaitSeconds, "recoverer_reboot_wait_seconds", 30, "How many seconds to sleep to ensure a reboot happend")
}
// The Recoverer reboots machines whose agent has stopped sending heartbeats or
@@ -47,6 +51,15 @@
return fmt.Errorf("failed to reboot device: %w", err)
}
+ // TODO(issue/215): replace this
+ // This is required as Equinix doesn't reboot the machines synchronously
+ // during the API call.
+ select {
+ case <-time.After(time.Duration(r.RebootWaitSeconds) * time.Second):
+ case <-ctx.Done():
+ return fmt.Errorf("while waiting for reboot: %w", ctx.Err())
+ }
+
klog.Infof("Removing AgentStarted/AgentHeartbeat (ID: %s, PID: %s)...", t.machine.MachineID, t.machine.ProviderID)
err := t.work.Finish(ctx, func(q *model.Queries) error {
if err := q.MachineDeleteAgentStarted(ctx, t.machine.MachineID); err != nil {