blob: a4455ecc3cad3dfc899768e37e184e87fb8d8f53 [file] [log] [blame]
Serge Bazanskif73d8a92021-11-02 21:19:45 +01001// hostsfile implements a service which owns and writes all node-local
2// files/interfaces used by the system to resolve the local node's name and the
3// names of other nodes in the cluster:
4//
Serge Bazanski37110c32023-03-01 13:57:27 +00005// 1. All cluster node names are written into /etc/hosts for DNS resolution.
6// 2. The local node's name is written into /etc/machine-id.
7// 3. The local node's name is set as the UNIX hostname of the machine (via the
8// sethostname call).
9// 4. The local node's ClusterDirectory is updated with the same set of
10// addresses as the one used in /etc/hosts.
Serge Bazanskif73d8a92021-11-02 21:19:45 +010011//
12// The hostsfile Service can start up in two modes: with cluster connectivity
13// and without cluster connectivity. Without cluster connectivity, only
14// information about the current node (as retrieved from the network service)
15// will be used to populate local data. In cluster mode, information about other
16// nodes is also used.
17package hostsfile
18
19import (
20 "bytes"
21 "context"
Serge Bazanskif73d8a92021-11-02 21:19:45 +010022 "fmt"
23 "net"
24 "sort"
25
26 "golang.org/x/sys/unix"
27 "google.golang.org/grpc"
Mateusz Zalegab30a41d2022-04-29 17:14:50 +020028 "google.golang.org/protobuf/proto"
Serge Bazanskif73d8a92021-11-02 21:19:45 +010029
30 ipb "source.monogon.dev/metropolis/node/core/curator/proto/api"
Serge Bazanskif73d8a92021-11-02 21:19:45 +010031 "source.monogon.dev/metropolis/node/core/localstorage"
32 "source.monogon.dev/metropolis/node/core/network"
Serge Bazanski6dff6d62022-01-28 18:15:14 +010033 "source.monogon.dev/metropolis/node/core/roleserve"
Serge Bazanskif73d8a92021-11-02 21:19:45 +010034 "source.monogon.dev/metropolis/pkg/supervisor"
Mateusz Zalegab30a41d2022-04-29 17:14:50 +020035 cpb "source.monogon.dev/metropolis/proto/common"
Serge Bazanskif73d8a92021-11-02 21:19:45 +010036)
37
38type Config struct {
Serge Bazanskif73d8a92021-11-02 21:19:45 +010039 // Network is a handle to the Network service, used to update the hostsfile
40 // service with information about the local node's external IP address.
41 Network *network.Service
42 // Ephemeral is the root of the ephemeral storage of the node, into which the
43 // service will write its managed files.
44 Ephemeral *localstorage.EphemeralDirectory
Mateusz Zalegab30a41d2022-04-29 17:14:50 +020045 // ESP is the root of the node's EFI System Partition.
46 ESP *localstorage.ESPDirectory
Serge Bazanski6dff6d62022-01-28 18:15:14 +010047
48 // Roleserver is an instance of the roleserver service which will be queried for
49 // ClusterMembership and a Curator client.
50 Roleserver *roleserve.Service
Serge Bazanskif73d8a92021-11-02 21:19:45 +010051}
52
53// Service is the hostsfile service instance. See package-level documentation
54// for more information.
55type Service struct {
56 Config
57
58 // localC is a channel populated by the local sub-runnable with the newest
59 // available information about the local node's address. It is automatically
60 // created and closed by Run.
61 localC chan string
62 // clusterC is a channel populated by the cluster sub-runnable with the newest
63 // available information about the cluster nodes. It is automatically created and
64 // closed by Run.
65 clusterC chan nodeMap
66}
67
68type ClusterDialer func(ctx context.Context) (*grpc.ClientConn, error)
69
Mateusz Zalegab30a41d2022-04-29 17:14:50 +020070// nodeInfo contains all of a single node's data needed to build its entry in
71// either hostsfile or ClusterDirectory.
72type nodeInfo struct {
73 // address is the node's IP address.
74 address string
75 // local is true if address belongs to the local node.
76 local bool
77}
78
Serge Bazanskif73d8a92021-11-02 21:19:45 +010079// nodeMap is a map from node ID (effectively DNS name) to node IP address.
Mateusz Zalegab30a41d2022-04-29 17:14:50 +020080type nodeMap map[string]nodeInfo
Serge Bazanskif73d8a92021-11-02 21:19:45 +010081
82// hosts generates a complete /etc/hosts file based on the contents of the
83// nodeMap. Apart from the addresses in the nodeMap, entries for localhost
84// pointing to 127.0.0.1 and ::1 will also be generated.
85func (m nodeMap) hosts(ctx context.Context) []byte {
86 var nodeIdsSorted []string
87 for k, _ := range m {
88 nodeIdsSorted = append(nodeIdsSorted, k)
89 }
90 sort.Slice(nodeIdsSorted, func(i, j int) bool {
91 return nodeIdsSorted[i] < nodeIdsSorted[j]
92 })
93
94 lines := [][]byte{
95 []byte("127.0.0.1 localhost"),
96 []byte("::1 localhost"),
97 }
98 for _, nid := range nodeIdsSorted {
Mateusz Zalegab30a41d2022-04-29 17:14:50 +020099 addr := m[nid].address
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100100 line := fmt.Sprintf("%s %s", addr, nid)
101 supervisor.Logger(ctx).Infof("Hosts entry: %s", line)
102 lines = append(lines, []byte(line))
103 }
104 lines = append(lines, []byte(""))
105
106 return bytes.Join(lines, []byte("\n"))
107}
108
Mateusz Zalegab30a41d2022-04-29 17:14:50 +0200109// clusterDirectory builds a ClusterDirectory based on nodeMap contents. If m
110// is empty, an empty ClusterDirectory is returned.
111func (m nodeMap) clusterDirectory(ctx context.Context) *cpb.ClusterDirectory {
112 var directory cpb.ClusterDirectory
113 for _, ni := range m {
114 // Skip local addresses.
115 if ni.local {
116 continue
117 }
118
119 supervisor.Logger(ctx).Infof("ClusterDirectory entry: %s", ni.address)
120 addresses := []*cpb.ClusterDirectory_Node_Address{
121 {Host: ni.address},
122 }
123 node := &cpb.ClusterDirectory_Node{
124 Addresses: addresses,
125 }
126 directory.Nodes = append(directory.Nodes, node)
127 }
128 return &directory
129}
130
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100131func (s *Service) Run(ctx context.Context) error {
132 s.localC = make(chan string)
133 defer close(s.localC)
134 s.clusterC = make(chan nodeMap)
135 defer close(s.clusterC)
136
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100137 cmw := s.Roleserver.ClusterMembership.Watch()
138 defer cmw.Close()
139 supervisor.Logger(ctx).Infof("Waiting for node ID...")
Serge Bazanski37110c32023-03-01 13:57:27 +0000140 nodeID, err := roleserve.GetNodeID(ctx, cmw)
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100141 if err != nil {
142 return err
143 }
144 supervisor.Logger(ctx).Infof("Got node ID, starting...")
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100145
146 if err := supervisor.Run(ctx, "local", s.runLocal); err != nil {
147 return err
148 }
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100149 if err := supervisor.Run(ctx, "cluster", s.runCluster); err != nil {
150 return err
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100151 }
152
153 // Immediately update machine-id and hostname, we don't need network addresses
154 // for that.
155 if err := s.Ephemeral.MachineID.Write([]byte(nodeID), 0644); err != nil {
156 return fmt.Errorf("failed to write /ephemeral/machine-id: %w", err)
157 }
158 if err := unix.Sethostname([]byte(nodeID)); err != nil {
159 return fmt.Errorf("failed to set runtime hostname: %w", err)
160 }
161 // Immediately write an /etc/hosts just containing localhost, even if we don't
162 // yet have a network address.
163 nodes := make(nodeMap)
164 if err := s.Ephemeral.Hosts.Write(nodes.hosts(ctx), 0644); err != nil {
165 return fmt.Errorf("failed to write %s: %w", s.Ephemeral.Hosts.FullPath(), err)
166 }
167
168 supervisor.Signal(ctx, supervisor.SignalHealthy)
169 // Update nodeMap in a loop, issuing writes/updates when any change occurred.
170 for {
171 changed := false
172 select {
173 case <-ctx.Done():
174 return ctx.Err()
175 case u := <-s.localC:
176 // Ignore spurious updates.
Mateusz Zalegab30a41d2022-04-29 17:14:50 +0200177 if nodes[nodeID].address == u {
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100178 break
179 }
180 supervisor.Logger(ctx).Infof("Got new local address: %s", u)
Mateusz Zalegab30a41d2022-04-29 17:14:50 +0200181 nodes[nodeID] = nodeInfo{
182 address: u,
183 local: true,
184 }
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100185 changed = true
186 case u := <-s.clusterC:
187 // Loop through the nodeMap from the cluster subrunnable, making note of what
188 // changed. By design we don't care about any nodes disappearing from the
189 // nodeMap: we'd rather keep stale data about nodes that don't exist any more,
190 // as these might either be spurious or have a long tail of effectively still
191 // being used by the local node for communications while the node gets fully
192 // drained/disowned.
193 //
194 // MVP: we should at least log removed nodes.
Mateusz Zalegab30a41d2022-04-29 17:14:50 +0200195 for id, info := range u {
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100196 // We're not interested in what the cluster thinks about our local node, as that
197 // might be outdated (eg. when we haven't yet reported a new local address to
198 // the cluster).
199 if id == nodeID {
200 continue
201 }
Mateusz Zalegab30a41d2022-04-29 17:14:50 +0200202 if nodes[id].address == info.address {
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100203 continue
204 }
Mateusz Zalegab30a41d2022-04-29 17:14:50 +0200205 supervisor.Logger(ctx).Infof("Got new cluster address: %s is %s", id, info.address)
206 nodes[id] = info
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100207 changed = true
208 }
209 }
210
211 if !changed {
212 continue
213 }
214
215 supervisor.Logger(ctx).Infof("Updating hosts file: %d nodes", len(nodes))
216 if err := s.Ephemeral.Hosts.Write(nodes.hosts(ctx), 0644); err != nil {
217 return fmt.Errorf("failed to write %s: %w", s.Ephemeral.Hosts.FullPath(), err)
218 }
219
220 // Check that we are self-resolvable.
221 if _, err := net.ResolveIPAddr("ip", nodeID); err != nil {
222 supervisor.Logger(ctx).Errorf("Failed to self-resolve %q: %v", nodeID, err)
223 }
224
Mateusz Zalegab30a41d2022-04-29 17:14:50 +0200225 // Update this node's ClusterDirectory.
226 supervisor.Logger(ctx).Info("Updating ClusterDirectory.")
227 cd := nodes.clusterDirectory(ctx)
228 cdirRaw, err := proto.Marshal(cd)
229 if err != nil {
230 return fmt.Errorf("couldn't marshal ClusterDirectory: %w", err)
231 }
232 if err = s.ESP.Metropolis.ClusterDirectory.Write(cdirRaw, 0644); err != nil {
233 return err
234 }
235 unix.Sync()
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100236 }
237}
238
239// runLocal updates s.localC with the IP address of the local node, as retrieved
240// from the network service.
241func (s *Service) runLocal(ctx context.Context) error {
242 nw := s.Network.Watch()
243 for {
244 ns, err := nw.Get(ctx)
245 if err != nil {
246 return err
247 }
248 addr := ns.ExternalAddress.String()
249 if addr != "" {
250 s.localC <- addr
251 }
252 }
253}
254
255// runCluster updates s.clusterC with the IP addresses of cluster nodes, as
256// retrieved from a Curator client from the ClusterDialer. The returned map
257// reflects the up-to-date view of the cluster returned from the Curator Watch
258// call, including any node deletions.
259func (s *Service) runCluster(ctx context.Context) error {
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100260 cmw := s.Roleserver.ClusterMembership.Watch()
261 defer cmw.Close()
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100262
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100263 supervisor.Logger(ctx).Infof("Waiting for cluster membership...")
Serge Bazanski37110c32023-03-01 13:57:27 +0000264 cm, err := cmw.Get(ctx, roleserve.FilterHome())
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100265 if err != nil {
266 return err
267 }
268 supervisor.Logger(ctx).Infof("Got cluster membership, starting...")
269
270 con, err := cm.DialCurator()
271 if err != nil {
272 return err
273 }
274 defer con.Close()
275 cur := ipb.NewCuratorClient(con)
276
277 w, err := cur.Watch(ctx, &ipb.WatchRequest{
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100278 Kind: &ipb.WatchRequest_NodesInCluster_{
279 NodesInCluster: &ipb.WatchRequest_NodesInCluster{},
280 },
281 })
282 if err != nil {
283 return fmt.Errorf("curator watch failed: %w", err)
284 }
285
286 nodes := make(nodeMap)
287 for {
288 ev, err := w.Recv()
289 if err != nil {
290 return fmt.Errorf("receive failed: %w", err)
291 }
292 for _, n := range ev.Nodes {
293 if n.Status == nil || n.Status.ExternalAddress == "" {
294 continue
295 }
Mateusz Zalegab30a41d2022-04-29 17:14:50 +0200296 nodes[n.Id] = nodeInfo{
297 address: n.Status.ExternalAddress,
298 local: false,
299 }
Serge Bazanskif73d8a92021-11-02 21:19:45 +0100300 }
301 for _, t := range ev.NodeTombstones {
302 delete(nodes, t.NodeId)
303 }
304 s.clusterC <- nodes
305 }
306}