m/n/core: automatically update ClusterDirectory
This extends the hostsfile service to also update ClusterDirectory
whenever cluster member address information is received.
Change-Id: I30dcd15ba4a59f13e48501ff1032c189e2e961af
Reviewed-on: https://review.monogon.dev/c/monogon/+/662
Reviewed-by: Sergiusz Bazanski <serge@monogon.tech>
diff --git a/metropolis/node/core/main.go b/metropolis/node/core/main.go
index 3254f2b..0af18e9 100644
--- a/metropolis/node/core/main.go
+++ b/metropolis/node/core/main.go
@@ -151,6 +151,7 @@
Roleserver: rs,
Network: networkSvc,
Ephemeral: &root.Ephemeral,
+ ESP: &root.ESP,
},
}
if err := supervisor.Run(ctx, "hostsfile", hostsfileSvc.Run); err != nil {
diff --git a/metropolis/node/core/network/hostsfile/BUILD.bazel b/metropolis/node/core/network/hostsfile/BUILD.bazel
index 7c0b117..14b5701 100644
--- a/metropolis/node/core/network/hostsfile/BUILD.bazel
+++ b/metropolis/node/core/network/hostsfile/BUILD.bazel
@@ -11,7 +11,9 @@
"//metropolis/node/core/network",
"//metropolis/node/core/roleserve",
"//metropolis/pkg/supervisor",
+ "//metropolis/proto/common",
"@org_golang_google_grpc//:go_default_library",
+ "@org_golang_google_protobuf//proto",
"@org_golang_x_sys//unix",
],
)
diff --git a/metropolis/node/core/network/hostsfile/hostsfile.go b/metropolis/node/core/network/hostsfile/hostsfile.go
index 069616f..8b5eb9a 100644
--- a/metropolis/node/core/network/hostsfile/hostsfile.go
+++ b/metropolis/node/core/network/hostsfile/hostsfile.go
@@ -6,6 +6,8 @@
// 2. The local node's name is written into /etc/machine-id.
// 3. The local node's name is set as the UNIX hostname of the machine (via the
// sethostname call).
+// 4. The local node's ClusterDirectory is updated with the same set of
+// addresses as the one used in /etc/hosts.
//
// The hostsfile Service can start up in two modes: with cluster connectivity
// and without cluster connectivity. Without cluster connectivity, only
@@ -23,12 +25,14 @@
"golang.org/x/sys/unix"
"google.golang.org/grpc"
+ "google.golang.org/protobuf/proto"
ipb "source.monogon.dev/metropolis/node/core/curator/proto/api"
"source.monogon.dev/metropolis/node/core/localstorage"
"source.monogon.dev/metropolis/node/core/network"
"source.monogon.dev/metropolis/node/core/roleserve"
"source.monogon.dev/metropolis/pkg/supervisor"
+ cpb "source.monogon.dev/metropolis/proto/common"
)
type Config struct {
@@ -38,6 +42,8 @@
// Ephemeral is the root of the ephemeral storage of the node, into which the
// service will write its managed files.
Ephemeral *localstorage.EphemeralDirectory
+ // ESP is the root of the node's EFI System Partition.
+ ESP *localstorage.ESPDirectory
// Roleserver is an instance of the roleserver service which will be queried for
// ClusterMembership and a Curator client.
@@ -61,8 +67,17 @@
type ClusterDialer func(ctx context.Context) (*grpc.ClientConn, error)
+// nodeInfo contains all of a single node's data needed to build its entry in
+// either hostsfile or ClusterDirectory.
+type nodeInfo struct {
+ // address is the node's IP address.
+ address string
+ // local is true if address belongs to the local node.
+ local bool
+}
+
// nodeMap is a map from node ID (effectively DNS name) to node IP address.
-type nodeMap map[string]string
+type nodeMap map[string]nodeInfo
// hosts generates a complete /etc/hosts file based on the contents of the
// nodeMap. Apart from the addresses in the nodeMap, entries for localhost
@@ -81,7 +96,7 @@
[]byte("::1 localhost"),
}
for _, nid := range nodeIdsSorted {
- addr := m[nid]
+ addr := m[nid].address
line := fmt.Sprintf("%s %s", addr, nid)
supervisor.Logger(ctx).Infof("Hosts entry: %s", line)
lines = append(lines, []byte(line))
@@ -91,6 +106,28 @@
return bytes.Join(lines, []byte("\n"))
}
+// clusterDirectory builds a ClusterDirectory based on nodeMap contents. If m
+// is empty, an empty ClusterDirectory is returned.
+func (m nodeMap) clusterDirectory(ctx context.Context) *cpb.ClusterDirectory {
+ var directory cpb.ClusterDirectory
+ for _, ni := range m {
+ // Skip local addresses.
+ if ni.local {
+ continue
+ }
+
+ supervisor.Logger(ctx).Infof("ClusterDirectory entry: %s", ni.address)
+ addresses := []*cpb.ClusterDirectory_Node_Address{
+ {Host: ni.address},
+ }
+ node := &cpb.ClusterDirectory_Node{
+ Addresses: addresses,
+ }
+ directory.Nodes = append(directory.Nodes, node)
+ }
+ return &directory
+}
+
func (s *Service) Run(ctx context.Context) error {
s.localC = make(chan string)
defer close(s.localC)
@@ -137,11 +174,14 @@
return ctx.Err()
case u := <-s.localC:
// Ignore spurious updates.
- if nodes[nodeID] == u {
+ if nodes[nodeID].address == u {
break
}
supervisor.Logger(ctx).Infof("Got new local address: %s", u)
- nodes[nodeID] = u
+ nodes[nodeID] = nodeInfo{
+ address: u,
+ local: true,
+ }
changed = true
case u := <-s.clusterC:
// Loop through the nodeMap from the cluster subrunnable, making note of what
@@ -152,18 +192,18 @@
// drained/disowned.
//
// MVP: we should at least log removed nodes.
- for id, addr := range u {
+ for id, info := range u {
// We're not interested in what the cluster thinks about our local node, as that
// might be outdated (eg. when we haven't yet reported a new local address to
// the cluster).
if id == nodeID {
continue
}
- if nodes[id] == addr {
+ if nodes[id].address == info.address {
continue
}
- supervisor.Logger(ctx).Infof("Got new cluster address: %s is %s", id, addr)
- nodes[id] = addr
+ supervisor.Logger(ctx).Infof("Got new cluster address: %s is %s", id, info.address)
+ nodes[id] = info
changed = true
}
}
@@ -182,6 +222,17 @@
supervisor.Logger(ctx).Errorf("Failed to self-resolve %q: %v", nodeID, err)
}
+ // Update this node's ClusterDirectory.
+ supervisor.Logger(ctx).Info("Updating ClusterDirectory.")
+ cd := nodes.clusterDirectory(ctx)
+ cdirRaw, err := proto.Marshal(cd)
+ if err != nil {
+ return fmt.Errorf("couldn't marshal ClusterDirectory: %w", err)
+ }
+ if err = s.ESP.Metropolis.ClusterDirectory.Write(cdirRaw, 0644); err != nil {
+ return err
+ }
+ unix.Sync()
}
}
@@ -242,7 +293,10 @@
if n.Status == nil || n.Status.ExternalAddress == "" {
continue
}
- nodes[n.Id] = n.Status.ExternalAddress
+ nodes[n.Id] = nodeInfo{
+ address: n.Status.ExternalAddress,
+ local: false,
+ }
}
for _, t := range ev.NodeTombstones {
delete(nodes, t.NodeId)