Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 1 | // hostsfile implements a service which owns and writes all node-local |
| 2 | // files/interfaces used by the system to resolve the local node's name and the |
| 3 | // names of other nodes in the cluster: |
| 4 | // |
Serge Bazanski | 37110c3 | 2023-03-01 13:57:27 +0000 | [diff] [blame^] | 5 | // 1. All cluster node names are written into /etc/hosts for DNS resolution. |
| 6 | // 2. The local node's name is written into /etc/machine-id. |
| 7 | // 3. The local node's name is set as the UNIX hostname of the machine (via the |
| 8 | // sethostname call). |
| 9 | // 4. The local node's ClusterDirectory is updated with the same set of |
| 10 | // addresses as the one used in /etc/hosts. |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 11 | // |
| 12 | // The hostsfile Service can start up in two modes: with cluster connectivity |
| 13 | // and without cluster connectivity. Without cluster connectivity, only |
| 14 | // information about the current node (as retrieved from the network service) |
| 15 | // will be used to populate local data. In cluster mode, information about other |
| 16 | // nodes is also used. |
| 17 | package hostsfile |
| 18 | |
| 19 | import ( |
| 20 | "bytes" |
| 21 | "context" |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 22 | "fmt" |
| 23 | "net" |
| 24 | "sort" |
| 25 | |
| 26 | "golang.org/x/sys/unix" |
| 27 | "google.golang.org/grpc" |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 28 | "google.golang.org/protobuf/proto" |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 29 | |
| 30 | ipb "source.monogon.dev/metropolis/node/core/curator/proto/api" |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 31 | "source.monogon.dev/metropolis/node/core/localstorage" |
| 32 | "source.monogon.dev/metropolis/node/core/network" |
Serge Bazanski | 6dff6d6 | 2022-01-28 18:15:14 +0100 | [diff] [blame] | 33 | "source.monogon.dev/metropolis/node/core/roleserve" |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 34 | "source.monogon.dev/metropolis/pkg/supervisor" |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 35 | cpb "source.monogon.dev/metropolis/proto/common" |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 36 | ) |
| 37 | |
| 38 | type Config struct { |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 39 | // Network is a handle to the Network service, used to update the hostsfile |
| 40 | // service with information about the local node's external IP address. |
| 41 | Network *network.Service |
| 42 | // Ephemeral is the root of the ephemeral storage of the node, into which the |
| 43 | // service will write its managed files. |
| 44 | Ephemeral *localstorage.EphemeralDirectory |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 45 | // ESP is the root of the node's EFI System Partition. |
| 46 | ESP *localstorage.ESPDirectory |
Serge Bazanski | 6dff6d6 | 2022-01-28 18:15:14 +0100 | [diff] [blame] | 47 | |
| 48 | // Roleserver is an instance of the roleserver service which will be queried for |
| 49 | // ClusterMembership and a Curator client. |
| 50 | Roleserver *roleserve.Service |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 51 | } |
| 52 | |
| 53 | // Service is the hostsfile service instance. See package-level documentation |
| 54 | // for more information. |
| 55 | type Service struct { |
| 56 | Config |
| 57 | |
| 58 | // localC is a channel populated by the local sub-runnable with the newest |
| 59 | // available information about the local node's address. It is automatically |
| 60 | // created and closed by Run. |
| 61 | localC chan string |
| 62 | // clusterC is a channel populated by the cluster sub-runnable with the newest |
| 63 | // available information about the cluster nodes. It is automatically created and |
| 64 | // closed by Run. |
| 65 | clusterC chan nodeMap |
| 66 | } |
| 67 | |
| 68 | type ClusterDialer func(ctx context.Context) (*grpc.ClientConn, error) |
| 69 | |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 70 | // nodeInfo contains all of a single node's data needed to build its entry in |
| 71 | // either hostsfile or ClusterDirectory. |
| 72 | type nodeInfo struct { |
| 73 | // address is the node's IP address. |
| 74 | address string |
| 75 | // local is true if address belongs to the local node. |
| 76 | local bool |
| 77 | } |
| 78 | |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 79 | // nodeMap is a map from node ID (effectively DNS name) to node IP address. |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 80 | type nodeMap map[string]nodeInfo |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 81 | |
| 82 | // hosts generates a complete /etc/hosts file based on the contents of the |
| 83 | // nodeMap. Apart from the addresses in the nodeMap, entries for localhost |
| 84 | // pointing to 127.0.0.1 and ::1 will also be generated. |
| 85 | func (m nodeMap) hosts(ctx context.Context) []byte { |
| 86 | var nodeIdsSorted []string |
| 87 | for k, _ := range m { |
| 88 | nodeIdsSorted = append(nodeIdsSorted, k) |
| 89 | } |
| 90 | sort.Slice(nodeIdsSorted, func(i, j int) bool { |
| 91 | return nodeIdsSorted[i] < nodeIdsSorted[j] |
| 92 | }) |
| 93 | |
| 94 | lines := [][]byte{ |
| 95 | []byte("127.0.0.1 localhost"), |
| 96 | []byte("::1 localhost"), |
| 97 | } |
| 98 | for _, nid := range nodeIdsSorted { |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 99 | addr := m[nid].address |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 100 | line := fmt.Sprintf("%s %s", addr, nid) |
| 101 | supervisor.Logger(ctx).Infof("Hosts entry: %s", line) |
| 102 | lines = append(lines, []byte(line)) |
| 103 | } |
| 104 | lines = append(lines, []byte("")) |
| 105 | |
| 106 | return bytes.Join(lines, []byte("\n")) |
| 107 | } |
| 108 | |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 109 | // clusterDirectory builds a ClusterDirectory based on nodeMap contents. If m |
| 110 | // is empty, an empty ClusterDirectory is returned. |
| 111 | func (m nodeMap) clusterDirectory(ctx context.Context) *cpb.ClusterDirectory { |
| 112 | var directory cpb.ClusterDirectory |
| 113 | for _, ni := range m { |
| 114 | // Skip local addresses. |
| 115 | if ni.local { |
| 116 | continue |
| 117 | } |
| 118 | |
| 119 | supervisor.Logger(ctx).Infof("ClusterDirectory entry: %s", ni.address) |
| 120 | addresses := []*cpb.ClusterDirectory_Node_Address{ |
| 121 | {Host: ni.address}, |
| 122 | } |
| 123 | node := &cpb.ClusterDirectory_Node{ |
| 124 | Addresses: addresses, |
| 125 | } |
| 126 | directory.Nodes = append(directory.Nodes, node) |
| 127 | } |
| 128 | return &directory |
| 129 | } |
| 130 | |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 131 | func (s *Service) Run(ctx context.Context) error { |
| 132 | s.localC = make(chan string) |
| 133 | defer close(s.localC) |
| 134 | s.clusterC = make(chan nodeMap) |
| 135 | defer close(s.clusterC) |
| 136 | |
Serge Bazanski | 6dff6d6 | 2022-01-28 18:15:14 +0100 | [diff] [blame] | 137 | cmw := s.Roleserver.ClusterMembership.Watch() |
| 138 | defer cmw.Close() |
| 139 | supervisor.Logger(ctx).Infof("Waiting for node ID...") |
Serge Bazanski | 37110c3 | 2023-03-01 13:57:27 +0000 | [diff] [blame^] | 140 | nodeID, err := roleserve.GetNodeID(ctx, cmw) |
Serge Bazanski | 6dff6d6 | 2022-01-28 18:15:14 +0100 | [diff] [blame] | 141 | if err != nil { |
| 142 | return err |
| 143 | } |
| 144 | supervisor.Logger(ctx).Infof("Got node ID, starting...") |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 145 | |
| 146 | if err := supervisor.Run(ctx, "local", s.runLocal); err != nil { |
| 147 | return err |
| 148 | } |
Serge Bazanski | 6dff6d6 | 2022-01-28 18:15:14 +0100 | [diff] [blame] | 149 | if err := supervisor.Run(ctx, "cluster", s.runCluster); err != nil { |
| 150 | return err |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 151 | } |
| 152 | |
| 153 | // Immediately update machine-id and hostname, we don't need network addresses |
| 154 | // for that. |
| 155 | if err := s.Ephemeral.MachineID.Write([]byte(nodeID), 0644); err != nil { |
| 156 | return fmt.Errorf("failed to write /ephemeral/machine-id: %w", err) |
| 157 | } |
| 158 | if err := unix.Sethostname([]byte(nodeID)); err != nil { |
| 159 | return fmt.Errorf("failed to set runtime hostname: %w", err) |
| 160 | } |
| 161 | // Immediately write an /etc/hosts just containing localhost, even if we don't |
| 162 | // yet have a network address. |
| 163 | nodes := make(nodeMap) |
| 164 | if err := s.Ephemeral.Hosts.Write(nodes.hosts(ctx), 0644); err != nil { |
| 165 | return fmt.Errorf("failed to write %s: %w", s.Ephemeral.Hosts.FullPath(), err) |
| 166 | } |
| 167 | |
| 168 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
| 169 | // Update nodeMap in a loop, issuing writes/updates when any change occurred. |
| 170 | for { |
| 171 | changed := false |
| 172 | select { |
| 173 | case <-ctx.Done(): |
| 174 | return ctx.Err() |
| 175 | case u := <-s.localC: |
| 176 | // Ignore spurious updates. |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 177 | if nodes[nodeID].address == u { |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 178 | break |
| 179 | } |
| 180 | supervisor.Logger(ctx).Infof("Got new local address: %s", u) |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 181 | nodes[nodeID] = nodeInfo{ |
| 182 | address: u, |
| 183 | local: true, |
| 184 | } |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 185 | changed = true |
| 186 | case u := <-s.clusterC: |
| 187 | // Loop through the nodeMap from the cluster subrunnable, making note of what |
| 188 | // changed. By design we don't care about any nodes disappearing from the |
| 189 | // nodeMap: we'd rather keep stale data about nodes that don't exist any more, |
| 190 | // as these might either be spurious or have a long tail of effectively still |
| 191 | // being used by the local node for communications while the node gets fully |
| 192 | // drained/disowned. |
| 193 | // |
| 194 | // MVP: we should at least log removed nodes. |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 195 | for id, info := range u { |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 196 | // We're not interested in what the cluster thinks about our local node, as that |
| 197 | // might be outdated (eg. when we haven't yet reported a new local address to |
| 198 | // the cluster). |
| 199 | if id == nodeID { |
| 200 | continue |
| 201 | } |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 202 | if nodes[id].address == info.address { |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 203 | continue |
| 204 | } |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 205 | supervisor.Logger(ctx).Infof("Got new cluster address: %s is %s", id, info.address) |
| 206 | nodes[id] = info |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 207 | changed = true |
| 208 | } |
| 209 | } |
| 210 | |
| 211 | if !changed { |
| 212 | continue |
| 213 | } |
| 214 | |
| 215 | supervisor.Logger(ctx).Infof("Updating hosts file: %d nodes", len(nodes)) |
| 216 | if err := s.Ephemeral.Hosts.Write(nodes.hosts(ctx), 0644); err != nil { |
| 217 | return fmt.Errorf("failed to write %s: %w", s.Ephemeral.Hosts.FullPath(), err) |
| 218 | } |
| 219 | |
| 220 | // Check that we are self-resolvable. |
| 221 | if _, err := net.ResolveIPAddr("ip", nodeID); err != nil { |
| 222 | supervisor.Logger(ctx).Errorf("Failed to self-resolve %q: %v", nodeID, err) |
| 223 | } |
| 224 | |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 225 | // Update this node's ClusterDirectory. |
| 226 | supervisor.Logger(ctx).Info("Updating ClusterDirectory.") |
| 227 | cd := nodes.clusterDirectory(ctx) |
| 228 | cdirRaw, err := proto.Marshal(cd) |
| 229 | if err != nil { |
| 230 | return fmt.Errorf("couldn't marshal ClusterDirectory: %w", err) |
| 231 | } |
| 232 | if err = s.ESP.Metropolis.ClusterDirectory.Write(cdirRaw, 0644); err != nil { |
| 233 | return err |
| 234 | } |
| 235 | unix.Sync() |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 236 | } |
| 237 | } |
| 238 | |
| 239 | // runLocal updates s.localC with the IP address of the local node, as retrieved |
| 240 | // from the network service. |
| 241 | func (s *Service) runLocal(ctx context.Context) error { |
| 242 | nw := s.Network.Watch() |
| 243 | for { |
| 244 | ns, err := nw.Get(ctx) |
| 245 | if err != nil { |
| 246 | return err |
| 247 | } |
| 248 | addr := ns.ExternalAddress.String() |
| 249 | if addr != "" { |
| 250 | s.localC <- addr |
| 251 | } |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | // runCluster updates s.clusterC with the IP addresses of cluster nodes, as |
| 256 | // retrieved from a Curator client from the ClusterDialer. The returned map |
| 257 | // reflects the up-to-date view of the cluster returned from the Curator Watch |
| 258 | // call, including any node deletions. |
| 259 | func (s *Service) runCluster(ctx context.Context) error { |
Serge Bazanski | 6dff6d6 | 2022-01-28 18:15:14 +0100 | [diff] [blame] | 260 | cmw := s.Roleserver.ClusterMembership.Watch() |
| 261 | defer cmw.Close() |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 262 | |
Serge Bazanski | 6dff6d6 | 2022-01-28 18:15:14 +0100 | [diff] [blame] | 263 | supervisor.Logger(ctx).Infof("Waiting for cluster membership...") |
Serge Bazanski | 37110c3 | 2023-03-01 13:57:27 +0000 | [diff] [blame^] | 264 | cm, err := cmw.Get(ctx, roleserve.FilterHome()) |
Serge Bazanski | 6dff6d6 | 2022-01-28 18:15:14 +0100 | [diff] [blame] | 265 | if err != nil { |
| 266 | return err |
| 267 | } |
| 268 | supervisor.Logger(ctx).Infof("Got cluster membership, starting...") |
| 269 | |
| 270 | con, err := cm.DialCurator() |
| 271 | if err != nil { |
| 272 | return err |
| 273 | } |
| 274 | defer con.Close() |
| 275 | cur := ipb.NewCuratorClient(con) |
| 276 | |
| 277 | w, err := cur.Watch(ctx, &ipb.WatchRequest{ |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 278 | Kind: &ipb.WatchRequest_NodesInCluster_{ |
| 279 | NodesInCluster: &ipb.WatchRequest_NodesInCluster{}, |
| 280 | }, |
| 281 | }) |
| 282 | if err != nil { |
| 283 | return fmt.Errorf("curator watch failed: %w", err) |
| 284 | } |
| 285 | |
| 286 | nodes := make(nodeMap) |
| 287 | for { |
| 288 | ev, err := w.Recv() |
| 289 | if err != nil { |
| 290 | return fmt.Errorf("receive failed: %w", err) |
| 291 | } |
| 292 | for _, n := range ev.Nodes { |
| 293 | if n.Status == nil || n.Status.ExternalAddress == "" { |
| 294 | continue |
| 295 | } |
Mateusz Zalega | b30a41d | 2022-04-29 17:14:50 +0200 | [diff] [blame] | 296 | nodes[n.Id] = nodeInfo{ |
| 297 | address: n.Status.ExternalAddress, |
| 298 | local: false, |
| 299 | } |
Serge Bazanski | f73d8a9 | 2021-11-02 21:19:45 +0100 | [diff] [blame] | 300 | } |
| 301 | for _, t := range ev.NodeTombstones { |
| 302 | delete(nodes, t.NodeId) |
| 303 | } |
| 304 | s.clusterC <- nodes |
| 305 | } |
| 306 | } |