m/n/core/clusternet: avoid spurious updates, log more
This should make debugging
https://github.com/monogon-dev/monogon/issues/235 easier, as I haven't
been able to replicate it locally.
Change-Id: I23f1a1d3d22841558e0db3e32b76b8bb8319fd3d
Reviewed-on: https://review.monogon.dev/c/monogon/+/1876
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/node/core/clusternet/clusternet.go b/metropolis/node/core/clusternet/clusternet.go
index dea0f89..2e01c77 100644
--- a/metropolis/node/core/clusternet/clusternet.go
+++ b/metropolis/node/core/clusternet/clusternet.go
@@ -103,14 +103,32 @@
supervisor.Signal(ctx, supervisor.SignalHealthy)
var kubePrefixes *Prefixes
+ var prevKubePrefixes *Prefixes
+
var localAddr net.IP
+ var prevLocalAddr net.IP
+
for {
+ kubeChanged := false
+ localChanged := false
+
select {
case <-ctx.Done():
return ctx.Err()
case kubePrefixes = <-kubeC:
+ if !kubePrefixes.Equal(prevKubePrefixes) {
+ kubeChanged = true
+ }
case n := <-netC:
localAddr = n.ExternalAddress
+ if !localAddr.Equal(prevLocalAddr) {
+ localChanged = true
+ }
+ }
+
+ // Ignore spurious updates.
+ if !localChanged && !kubeChanged {
+ continue
}
// Prepare prefixes to submit to cluster.
@@ -128,7 +146,7 @@
prefixes.Update(kubePrefixes)
}
- supervisor.Logger(ctx).Infof("Submitting prefixes: %s", prefixes)
+ supervisor.Logger(ctx).Infof("Submitting prefixes: %s (kube update: %v, local update: %v)", prefixes, kubeChanged, localChanged)
err := backoff.Retry(func() error {
_, err := s.Curator.UpdateNodeClusterNetworking(ctx, &apb.UpdateNodeClusterNetworkingRequest{
@@ -145,6 +163,10 @@
if err != nil {
return fmt.Errorf("couldn't update curator: %w", err)
}
+
+ prevKubePrefixes = kubePrefixes
+ prevLocalAddr = localAddr
+
}
}
@@ -200,6 +222,12 @@
} else {
succeeded = len(newNodes)
}
- supervisor.Logger(ctx).Infof("Successfully updated %d out of %d nodes", succeeded, len(newNodes))
+
+ if len(newNodes) != 0 {
+ supervisor.Logger(ctx).Infof("Successfully updated %d out of %d nodes", succeeded, len(newNodes))
+
+ numNodes, numPrefixes := nodes.stats()
+ supervisor.Logger(ctx).Infof("Total: %d nodes, %d prefixes.", numNodes, numPrefixes)
+ }
}
}
diff --git a/metropolis/node/core/clusternet/types.go b/metropolis/node/core/clusternet/types.go
index 0d776f2..088cf8e 100644
--- a/metropolis/node/core/clusternet/types.go
+++ b/metropolis/node/core/clusternet/types.go
@@ -44,13 +44,22 @@
// String returns a stringified, comma-dalimited representation of the prefixes.
func (p *Prefixes) String() string {
+ if p == nil {
+ return ""
+ }
+
var strs []string
for _, pp := range *p {
strs = append(strs, pp.String())
}
+ sort.Strings(strs)
return strings.Join(strs, ", ")
}
+func (p *Prefixes) Equal(o *Prefixes) bool {
+ return p.String() == o.String()
+}
+
// node is used for internal statekeeping in the cluster networking service.
type node struct {
id string
@@ -107,6 +116,16 @@
}
}
+func (n *nodeMap) stats() (nodes int, prefixes int) {
+ nodes = len(n.nodes)
+
+ for _, node := range n.nodes {
+ prefixes += len(node.prefixes)
+ }
+
+ return
+}
+
// update updates the nodeMap from the given Curator WatchEvent, interpreting
// both node changes and deletions. Two nodeMaps are returned: the first one
// contains only nodes that have been added/changed by the given event, the other