m/n/c/curator: log leadership key details when transaction gets aborted due to leadership loss
This should allow us to debug
https://github.com/monogon-dev/monogon/issues/276 a bit further.
Change-Id: If709858df15de67f19beffc8e14b0ab09bba89c8
Reviewed-on: https://review.monogon.dev/c/monogon/+/2341
Tested-by: Jenkins CI
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
diff --git a/metropolis/node/core/curator/impl_leader.go b/metropolis/node/core/curator/impl_leader.go
index 2a66e29..177d436 100644
--- a/metropolis/node/core/curator/impl_leader.go
+++ b/metropolis/node/core/curator/impl_leader.go
@@ -102,7 +102,21 @@
return nil, fmt.Errorf("when running leader transaction: %w", err)
}
if !resp.Succeeded {
- rpc.Trace(ctx).Printf("txnAsLeader(...): rejected (lost leadership)")
+ // Transaction failed because leadership was lost. Log error with
+ // detailed information about lock key, expected revision and found
+ // revision to aid debugging.
+ checkRes, err := l.etcd.Get(ctx, l.lockKey)
+ var lockRev string
+ if err != nil {
+ lockRev = fmt.Sprintf("couldn't check: %v", err)
+ } else {
+ if len(checkRes.Kvs) > 0 {
+ lockRev = fmt.Sprintf("%d", checkRes.Kvs[0].CreateRevision)
+ } else {
+ lockRev = "no revision?"
+ }
+ }
+ rpc.Trace(ctx).Printf("txnAsLeader(...): rejected (lost leadership (key %s should've been at rev %d, is at rev %s)", l.lockKey, l.lockRev, lockRev)
return nil, lostLeadership
}
rpc.Trace(ctx).Printf("txnAsLeader(...): ok")