blob: 529120bd8d5b0529fa3f3a710eb84fb7a3d0c225 [file] [log] [blame]
Serge Bazanski42e61c62021-03-18 15:07:18 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanski37110c32023-03-01 13:57:27 +000017// Package cluster implements low-level clustering logic, especially logic
18// regarding to bootstrapping, registering into and joining a cluster. Its goal
19// is to provide the rest of the node code with the following:
20// - A mounted plaintext storage.
21// - Node credentials/identity.
22// - A locally running etcd server if the node is supposed to run one, and a
23// client connection to that etcd cluster if so.
24// - The state of the cluster as seen by the node, to enable code to respond to
25// node lifecycle changes.
Serge Bazanski42e61c62021-03-18 15:07:18 +010026package cluster
27
28import (
Serge Bazanskia959cbd2021-06-17 15:56:51 +020029 "context"
30 "errors"
Serge Bazanski42e61c62021-03-18 15:07:18 +010031 "fmt"
Mateusz Zalega2930e992022-04-25 12:52:35 +020032 "strings"
Serge Bazanski42e61c62021-03-18 15:07:18 +010033
Serge Bazanskia959cbd2021-06-17 15:56:51 +020034 "source.monogon.dev/metropolis/node/core/localstorage"
35 "source.monogon.dev/metropolis/node/core/network"
Serge Bazanski6dff6d62022-01-28 18:15:14 +010036 "source.monogon.dev/metropolis/node/core/roleserve"
Serge Bazanskia959cbd2021-06-17 15:56:51 +020037 "source.monogon.dev/metropolis/pkg/supervisor"
38 apb "source.monogon.dev/metropolis/proto/api"
Mateusz Zalega2930e992022-04-25 12:52:35 +020039 cpb "source.monogon.dev/metropolis/proto/common"
Serge Bazanski42e61c62021-03-18 15:07:18 +010040)
41
Serge Bazanskia959cbd2021-06-17 15:56:51 +020042type Manager struct {
43 storageRoot *localstorage.Root
44 networkService *network.Service
Serge Bazanski6dff6d62022-01-28 18:15:14 +010045 roleServer *roleserve.Service
Lorenz Brun85ad26a2023-03-27 17:00:00 +020046 nodeParams *apb.NodeParameters
Serge Bazanski5df62ba2023-03-22 17:56:46 +010047 haveTPM bool
Serge Bazanskia959cbd2021-06-17 15:56:51 +020048
Serge Bazanskife5192d2023-03-16 11:33:56 +010049 oneway chan struct{}
Serge Bazanskia959cbd2021-06-17 15:56:51 +020050}
51
52// NewManager creates a new cluster Manager. The given localstorage Root must
53// be places, but not yet started (and will be started as the Manager makes
54// progress). The given network Service must already be running.
Serge Bazanski5df62ba2023-03-22 17:56:46 +010055func NewManager(storageRoot *localstorage.Root, networkService *network.Service, rs *roleserve.Service, nodeParams *apb.NodeParameters, haveTPM bool) *Manager {
Serge Bazanskia959cbd2021-06-17 15:56:51 +020056 return &Manager{
57 storageRoot: storageRoot,
58 networkService: networkService,
Serge Bazanski6dff6d62022-01-28 18:15:14 +010059 roleServer: rs,
Lorenz Brun85ad26a2023-03-27 17:00:00 +020060 nodeParams: nodeParams,
Serge Bazanski5df62ba2023-03-22 17:56:46 +010061 haveTPM: haveTPM,
Serge Bazanskife5192d2023-03-16 11:33:56 +010062 oneway: make(chan struct{}),
Serge Bazanskia959cbd2021-06-17 15:56:51 +020063 }
64}
65
Serge Bazanskia959cbd2021-06-17 15:56:51 +020066// Run is the runnable of the Manager, to be started using the Supervisor. It
67// is one-shot, and should not be restarted.
68func (m *Manager) Run(ctx context.Context) error {
Serge Bazanskife5192d2023-03-16 11:33:56 +010069 select {
70 case <-m.oneway:
Serge Bazanskia959cbd2021-06-17 15:56:51 +020071 return fmt.Errorf("cannot restart cluster manager")
Serge Bazanskife5192d2023-03-16 11:33:56 +010072 default:
Serge Bazanskia959cbd2021-06-17 15:56:51 +020073 }
Serge Bazanskife5192d2023-03-16 11:33:56 +010074 close(m.oneway)
Serge Bazanskia959cbd2021-06-17 15:56:51 +020075
Serge Bazanskie4a4ce12023-03-22 18:29:54 +010076 // Try sealed configuration first.
Lorenz Brun6c35e972021-12-14 03:08:23 +010077 configuration, err := m.storageRoot.ESP.Metropolis.SealedConfiguration.Unseal()
Serge Bazanskia959cbd2021-06-17 15:56:51 +020078 if err == nil {
79 supervisor.Logger(ctx).Info("Sealed configuration present. attempting to join cluster")
Mateusz Zalega2930e992022-04-25 12:52:35 +020080
81 // Read Cluster Directory and unmarshal it. Since the node is already
82 // registered with the cluster, the directory won't be bootstrapped from
83 // Node Parameters.
84 cd, err := m.storageRoot.ESP.Metropolis.ClusterDirectory.Unmarshal()
85 if err != nil {
86 return fmt.Errorf("while reading cluster directory: %w", err)
87 }
Serge Bazanskie4a4ce12023-03-22 18:29:54 +010088 return m.join(ctx, configuration, cd, true)
Serge Bazanskia959cbd2021-06-17 15:56:51 +020089 }
90
91 if !errors.Is(err, localstorage.ErrNoSealed) {
92 return fmt.Errorf("unexpected sealed config error: %w", err)
93 }
94
Serge Bazanskie4a4ce12023-03-22 18:29:54 +010095 configuration, err = m.storageRoot.ESP.Metropolis.SealedConfiguration.ReadUnsafe()
96 if err == nil {
97 supervisor.Logger(ctx).Info("Non-sealed configuration present. attempting to join cluster")
98
99 // Read Cluster Directory and unmarshal it. Since the node is already
100 // registered with the cluster, the directory won't be bootstrapped from
101 // Node Parameters.
102 cd, err := m.storageRoot.ESP.Metropolis.ClusterDirectory.Unmarshal()
103 if err != nil {
104 return fmt.Errorf("while reading cluster directory: %w", err)
105 }
106 return m.join(ctx, configuration, cd, false)
107 }
108
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200109 supervisor.Logger(ctx).Info("No sealed configuration, looking for node parameters")
110
Lorenz Brun85ad26a2023-03-27 17:00:00 +0200111 switch inner := m.nodeParams.Cluster.(type) {
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200112 case *apb.NodeParameters_ClusterBootstrap_:
Serge Bazanski5839e972021-11-16 15:46:19 +0100113 err = m.bootstrap(ctx, inner.ClusterBootstrap)
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200114 case *apb.NodeParameters_ClusterRegister_:
Serge Bazanski5839e972021-11-16 15:46:19 +0100115 err = m.register(ctx, inner.ClusterRegister)
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200116 default:
Serge Bazanski5839e972021-11-16 15:46:19 +0100117 err = fmt.Errorf("node parameters misconfigured: neither cluster_bootstrap nor cluster_register set")
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200118 }
Serge Bazanski5839e972021-11-16 15:46:19 +0100119
120 if err == nil {
121 supervisor.Logger(ctx).Info("Cluster enrolment done.")
Serge Bazanskife5192d2023-03-16 11:33:56 +0100122 return nil
Serge Bazanski5839e972021-11-16 15:46:19 +0100123 }
124 return err
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200125}
126
Mateusz Zalega2930e992022-04-25 12:52:35 +0200127// logClusterDirectory verbosely logs the whole Cluster Directory passed to it.
128func logClusterDirectory(ctx context.Context, cd *cpb.ClusterDirectory) {
129 for _, node := range cd.Nodes {
Mateusz Zalega2930e992022-04-25 12:52:35 +0200130 var addresses []string
131 for _, add := range node.Addresses {
132 addresses = append(addresses, add.Host)
133 }
Mateusz Zalegade821502022-04-29 16:37:17 +0200134 supervisor.Logger(ctx).Infof(" Addresses: %s", strings.Join(addresses, ","))
Mateusz Zalega2930e992022-04-25 12:52:35 +0200135 }
136}