m/n/c/curator: share locks across leader service instances
This is needed to lock access to nodes between the Curator and Managment
instances.
Change-Id: I4609d87a961339235a13af57236f80c9976819ed
Reviewed-on: https://review.monogon.dev/c/monogon/+/339
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/node/core/curator/impl_leader.go b/metropolis/node/core/curator/impl_leader.go
index 7767e48..04665db 100644
--- a/metropolis/node/core/curator/impl_leader.go
+++ b/metropolis/node/core/curator/impl_leader.go
@@ -4,6 +4,7 @@
"context"
"errors"
"fmt"
+ "sync"
"go.etcd.io/etcd/clientv3"
"google.golang.org/grpc/codes"
@@ -24,6 +25,15 @@
// etcd is the etcd client in which curator data and leader election state is
// stored.
etcd client.Namespaced
+
+ // muNodes guards any changes to nodes, and prevents race conditions where the
+ // curator performs a read-modify-write operation to node data. The curator's
+ // leadership ensure no two curators run simultaneously, and this lock ensures
+ // no two parallel curator operations race eachother.
+ //
+ // This lock has to be taken any time such RMW operation takes place when not
+ // additionally guarded using etcd transactions.
+ muNodes sync.Mutex
}
var (
@@ -75,7 +85,7 @@
leaderManagement
}
-func newCuratorLeader(l leadership) *curatorLeader {
+func newCuratorLeader(l *leadership) *curatorLeader {
return &curatorLeader{
leaderCurator{leadership: l},
leaderAAA{leadership: l},
diff --git a/metropolis/node/core/curator/impl_leader_aaa.go b/metropolis/node/core/curator/impl_leader_aaa.go
index dd0d546..43e325c 100644
--- a/metropolis/node/core/curator/impl_leader_aaa.go
+++ b/metropolis/node/core/curator/impl_leader_aaa.go
@@ -23,7 +23,7 @@
)
type leaderAAA struct {
- leadership
+ *leadership
}
// getOwnerPubkey returns the public key of the configured owner of the cluster.
diff --git a/metropolis/node/core/curator/impl_leader_curator.go b/metropolis/node/core/curator/impl_leader_curator.go
index 5a15c94..fd8602d 100644
--- a/metropolis/node/core/curator/impl_leader_curator.go
+++ b/metropolis/node/core/curator/impl_leader_curator.go
@@ -2,7 +2,6 @@
import (
"context"
- "sync"
"go.etcd.io/etcd/clientv3"
"google.golang.org/grpc/codes"
@@ -18,16 +17,7 @@
// leaderCurator implements the Curator gRPC API (cpb.Curator) as a curator
// leader.
type leaderCurator struct {
- leadership
-
- // muNodes guards any changes to nodes, and prevents race conditions where the
- // curator performs a read-modify-write operation to node data. The curator's
- // leadership ensure no two curators run simultaneously, and this lock ensures
- // no two parallel curator operations race eachother.
- //
- // This lock has to be taken any time such RMW operation takes place when not
- // additionally guarded using etcd transactions.
- muNodes sync.Mutex
+ *leadership
}
// Watch returns a stream of updates concerning some part of the cluster
diff --git a/metropolis/node/core/curator/impl_leader_management.go b/metropolis/node/core/curator/impl_leader_management.go
index 7284eb0..0f6b55c 100644
--- a/metropolis/node/core/curator/impl_leader_management.go
+++ b/metropolis/node/core/curator/impl_leader_management.go
@@ -14,7 +14,7 @@
)
type leaderManagement struct {
- leadership
+ *leadership
}
const (
diff --git a/metropolis/node/core/curator/impl_leader_test.go b/metropolis/node/core/curator/impl_leader_test.go
index d20541e..4318a1f 100644
--- a/metropolis/node/core/curator/impl_leader_test.go
+++ b/metropolis/node/core/curator/impl_leader_test.go
@@ -57,7 +57,7 @@
// Build a curator leader object. This implements methods that will be
// exercised by tests.
- leader := newCuratorLeader(leadership{
+ leader := newCuratorLeader(&leadership{
lockKey: lockKey,
lockRev: lockRev,
etcd: cl,
diff --git a/metropolis/node/core/curator/listener.go b/metropolis/node/core/curator/listener.go
index a190167..76bc1a5 100644
--- a/metropolis/node/core/curator/listener.go
+++ b/metropolis/node/core/curator/listener.go
@@ -138,7 +138,14 @@
t.ctxC = &implCtxC
if leader := status.leader; leader != nil {
supervisor.Logger(ctx).Info("Dispatcher switching over to local leader")
- t.impl = newCuratorLeader(leadership{
+ // Create a new leadership and pass it to all leader service instances.
+ //
+ // This shares the leadership locks across all of them. Each time we regain
+ // leadership, a new set of locks is created - this is fine, as even if we
+ // happen to have to instances of the leader running (one old hanging on a lock
+ // and a new one with the lock freed) the previous leader will fail on
+ // txnAsLeader due to the leadership being outdated.
+ t.impl = newCuratorLeader(&leadership{
lockKey: leader.lockKey,
lockRev: leader.lockRev,
etcd: l.etcd,