m/n/c/curator: share locks across leader service instances This is needed to lock access to nodes between the Curator and Managment instances. Change-Id: I4609d87a961339235a13af57236f80c9976819ed Reviewed-on: https://review.monogon.dev/c/monogon/+/339 Reviewed-by: Lorenz Brun <lorenz@monogon.tech>

commit: 3be483247a07a6ebe73dd044f6ad299e19a04c7b [log] [tgz]
author: Serge Bazanski <serge@monogon.tech> Tue Oct 05 17:24:26 2021 +0200
committer: Sergiusz Bazanski <serge@monogon.tech> Wed Oct 06 14:49:50 2021 +0000
tree: 91e414202d66fd57a7064d9cd6ff975530b56143
parent: 826a9e94db7345bbb1932fa51049bc6e090391e3 [diff]
diff --git a/metropolis/node/core/curator/impl_leader.go b/metropolis/node/core/curator/impl_leader.go
index 7767e48..04665db 100644
--- a/metropolis/node/core/curator/impl_leader.go
+++ b/metropolis/node/core/curator/impl_leader.go

@@ -4,6 +4,7 @@
 	"context"
 	"errors"
 	"fmt"
+	"sync"
 
 	"go.etcd.io/etcd/clientv3"
 	"google.golang.org/grpc/codes"
@@ -24,6 +25,15 @@
 	// etcd is the etcd client in which curator data and leader election state is
 	// stored.
 	etcd client.Namespaced
+
+	// muNodes guards any changes to nodes, and prevents race conditions where the
+	// curator performs a read-modify-write operation to node data. The curator's
+	// leadership ensure no two curators run simultaneously, and this lock ensures
+	// no two parallel curator operations race eachother.
+	//
+	// This lock has to be taken any time such RMW operation takes place when not
+	// additionally guarded using etcd transactions.
+	muNodes sync.Mutex
 }
 
 var (
@@ -75,7 +85,7 @@
 	leaderManagement
 }
 
-func newCuratorLeader(l leadership) *curatorLeader {
+func newCuratorLeader(l *leadership) *curatorLeader {
 	return &curatorLeader{
 		leaderCurator{leadership: l},
 		leaderAAA{leadership: l},

diff --git a/metropolis/node/core/curator/impl_leader_aaa.go b/metropolis/node/core/curator/impl_leader_aaa.go
index dd0d546..43e325c 100644
--- a/metropolis/node/core/curator/impl_leader_aaa.go
+++ b/metropolis/node/core/curator/impl_leader_aaa.go

@@ -23,7 +23,7 @@
 )
 
 type leaderAAA struct {
-	leadership
+	*leadership
 }
 
 // getOwnerPubkey returns the public key of the configured owner of the cluster.

diff --git a/metropolis/node/core/curator/impl_leader_curator.go b/metropolis/node/core/curator/impl_leader_curator.go
index 5a15c94..fd8602d 100644
--- a/metropolis/node/core/curator/impl_leader_curator.go
+++ b/metropolis/node/core/curator/impl_leader_curator.go

@@ -2,7 +2,6 @@
 
 import (
 	"context"
-	"sync"
 
 	"go.etcd.io/etcd/clientv3"
 	"google.golang.org/grpc/codes"
@@ -18,16 +17,7 @@
 // leaderCurator implements the Curator gRPC API (cpb.Curator) as a curator
 // leader.
 type leaderCurator struct {
-	leadership
-
-	// muNodes guards any changes to nodes, and prevents race conditions where the
-	// curator performs a read-modify-write operation to node data. The curator's
-	// leadership ensure no two curators run simultaneously, and this lock ensures
-	// no two parallel curator operations race eachother.
-	//
-	// This lock has to be taken any time such RMW operation takes place when not
-	// additionally guarded using etcd transactions.
-	muNodes sync.Mutex
+	*leadership
 }
 
 // Watch returns a stream of updates concerning some part of the cluster

diff --git a/metropolis/node/core/curator/impl_leader_management.go b/metropolis/node/core/curator/impl_leader_management.go
index 7284eb0..0f6b55c 100644
--- a/metropolis/node/core/curator/impl_leader_management.go
+++ b/metropolis/node/core/curator/impl_leader_management.go

@@ -14,7 +14,7 @@
 )
 
 type leaderManagement struct {
-	leadership
+	*leadership
 }
 
 const (

diff --git a/metropolis/node/core/curator/impl_leader_test.go b/metropolis/node/core/curator/impl_leader_test.go
index d20541e..4318a1f 100644
--- a/metropolis/node/core/curator/impl_leader_test.go
+++ b/metropolis/node/core/curator/impl_leader_test.go

@@ -57,7 +57,7 @@
 
 	// Build a curator leader object. This implements methods that will be
 	// exercised by tests.
-	leader := newCuratorLeader(leadership{
+	leader := newCuratorLeader(&leadership{
 		lockKey: lockKey,
 		lockRev: lockRev,
 		etcd:    cl,

diff --git a/metropolis/node/core/curator/listener.go b/metropolis/node/core/curator/listener.go
index a190167..76bc1a5 100644
--- a/metropolis/node/core/curator/listener.go
+++ b/metropolis/node/core/curator/listener.go

@@ -138,7 +138,14 @@
 	t.ctxC = &implCtxC
 	if leader := status.leader; leader != nil {
 		supervisor.Logger(ctx).Info("Dispatcher switching over to local leader")
-		t.impl = newCuratorLeader(leadership{
+		// Create a new leadership and pass it to all leader service instances.
+		//
+		// This shares the leadership locks across all of them. Each time we regain
+		// leadership, a new set of locks is created - this is fine, as even if we
+		// happen to have to instances of the leader running (one old hanging on a lock
+		// and a new one with the lock freed) the previous leader will fail on
+		// txnAsLeader due to the leadership being outdated.
+		t.impl = newCuratorLeader(&leadership{
 			lockKey: leader.lockKey,
 			lockRev: leader.lockRev,
 			etcd:    l.etcd,
commit	3be483247a07a6ebe73dd044f6ad299e19a04c7b	[log] [tgz]
author	Serge Bazanski <serge@monogon.tech>	Tue Oct 05 17:24:26 2021 +0200
committer	Sergiusz Bazanski <serge@monogon.tech>	Wed Oct 06 14:49:50 2021 +0000
tree	91e414202d66fd57a7064d9cd6ff975530b56143
parent	826a9e94db7345bbb1932fa51049bc6e090391e3 [diff]