cloud/shepherd/equinix: implement initializer parallelism
This adds a flag which allows starting multiple initializers in
parallel, sharing the same SSH key/config and API rate limiting.
Change-Id: I415e855d9b649fac258e25d884cac17f895c91c0
Reviewed-on: https://review.monogon.dev/c/monogon/+/1135
Tested-by: Jenkins CI
Reviewed-by: Mateusz Zalega <mateusz@monogon.tech>
diff --git a/cloud/shepherd/equinix/manager/BUILD.bazel b/cloud/shepherd/equinix/manager/BUILD.bazel
index 004cd24..2c702fe 100644
--- a/cloud/shepherd/equinix/manager/BUILD.bazel
+++ b/cloud/shepherd/equinix/manager/BUILD.bazel
@@ -23,6 +23,7 @@
"@io_k8s_klog_v2//:klog",
"@org_golang_google_protobuf//proto",
"@org_golang_x_crypto//ssh",
+ "@org_golang_x_sync//errgroup",
"@org_golang_x_time//rate",
],
)
diff --git a/cloud/shepherd/equinix/manager/initializer.go b/cloud/shepherd/equinix/manager/initializer.go
index 5b1ea49..6194f0d 100644
--- a/cloud/shepherd/equinix/manager/initializer.go
+++ b/cloud/shepherd/equinix/manager/initializer.go
@@ -16,6 +16,7 @@
"github.com/google/uuid"
"github.com/packethost/packngo"
"golang.org/x/crypto/ssh"
+ "golang.org/x/sync/errgroup"
"golang.org/x/time/rate"
"google.golang.org/protobuf/proto"
"k8s.io/klog/v2"
@@ -76,6 +77,15 @@
// DBQueryLimiter limits the rate at which BMDB is queried for servers ready
// for BMaaS agent initialization. Must be set.
DBQueryLimiter *rate.Limiter
+
+ // Parallelism is how many instances of the Initializer will be allowed to run in
+ // parallel against the BMDB. This speeds up the process of starting/restarting
+ // agents significantly, as one initializer instance can handle at most one agent
+ // (re)starting process.
+ //
+ // If not set (ie. 0), default to 1. A good starting value for production
+ // deployments is 10 or so.
+ Parallelism int
}
// flagLimiter configures a *rate.Limiter as a flag.
@@ -106,6 +116,7 @@
func (i *InitializerConfig) RegisterFlags() {
flagLimiter(&i.DBQueryLimiter, "initializer_db_query_rate", "250ms,8", "Rate limiting for BMDB queries")
+ flag.IntVar(&i.Parallelism, "initializer_parallelism", 1, "How many initializer instances to run in parallel, ie. how many agents to attempt to (re)start at once")
}
// Initializer implements the BMaaS agent initialization process. Initialization
@@ -158,6 +169,9 @@
if c.DBQueryLimiter == nil {
return nil, fmt.Errorf("DBQueryLimiter must be configured")
}
+ if c.Parallelism == 0 {
+ c.Parallelism = 1
+ }
return &Initializer{
config: c,
sharedConfig: sc,
@@ -167,9 +181,21 @@
}, nil
}
+// Run the initializer(s) (depending on opts.Parallelism) blocking the current
+// goroutine until the given context expires and all provisioners quit.
+func (i *Initializer) Run(ctx context.Context, conn *bmdb.Connection) error {
+ eg := errgroup.Group{}
+ for j := 0; j < i.config.Parallelism; j += 1 {
+ eg.Go(func() error {
+ return i.runOne(ctx, conn)
+ })
+ }
+ return eg.Wait()
+}
+
// Run the initializer blocking the current goroutine until the given context
// expires.
-func (c *Initializer) Run(ctx context.Context, conn *bmdb.Connection) error {
+func (c *Initializer) runOne(ctx context.Context, conn *bmdb.Connection) error {
signer, err := c.sharedConfig.sshSigner()
if err != nil {
return fmt.Errorf("could not initialize signer: %w", err)
diff --git a/go.mod b/go.mod
index 0c1ee62..9ec5608 100644
--- a/go.mod
+++ b/go.mod
@@ -122,6 +122,7 @@
golang.org/x/crypto v0.0.0-20220517005047-85d78b3ac167
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3
golang.org/x/net v0.0.0-20220425223048-2871e0cb64e4
+ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
golang.org/x/sys v0.0.0-20220804214406-8e32c043e418
golang.org/x/text v0.3.7
golang.org/x/time v0.0.0-20220224211638-0e9765cccd65
@@ -378,7 +379,6 @@
golang.org/x/arch v0.0.0-20190927153633-4e8777c89be4 // indirect
golang.org/x/exp v0.0.0-20220428152302-39d4317da171 // indirect
golang.org/x/oauth2 v0.0.0-20220411215720-9780585627b5 // indirect
- golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/xerrors v0.0.0-20220411194840-2f41105eb62f // indirect
golang.zx2c4.com/wireguard v0.0.0-20220202223031-3b95c81cc178 // indirect