blob: a009c33a8186affa75c6934457eba39718986380 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
Serge Bazanski42e61c62021-03-18 15:07:18 +01002// SPDX-License-Identifier: Apache-2.0
Serge Bazanski42e61c62021-03-18 15:07:18 +01003
Serge Bazanski37110c32023-03-01 13:57:27 +00004// Package cluster implements low-level clustering logic, especially logic
5// regarding to bootstrapping, registering into and joining a cluster. Its goal
6// is to provide the rest of the node code with the following:
7// - A mounted plaintext storage.
8// - Node credentials/identity.
9// - A locally running etcd server if the node is supposed to run one, and a
10// client connection to that etcd cluster if so.
11// - The state of the cluster as seen by the node, to enable code to respond to
12// node lifecycle changes.
Serge Bazanski42e61c62021-03-18 15:07:18 +010013package cluster
14
15import (
Serge Bazanskia959cbd2021-06-17 15:56:51 +020016 "context"
17 "errors"
Serge Bazanski42e61c62021-03-18 15:07:18 +010018 "fmt"
19
Serge Bazanskia959cbd2021-06-17 15:56:51 +020020 "source.monogon.dev/metropolis/node/core/localstorage"
21 "source.monogon.dev/metropolis/node/core/network"
Serge Bazanski6dff6d62022-01-28 18:15:14 +010022 "source.monogon.dev/metropolis/node/core/roleserve"
Lorenz Brun35fcf032023-06-29 04:15:58 +020023 "source.monogon.dev/metropolis/node/core/update"
Serge Bazanskia959cbd2021-06-17 15:56:51 +020024 apb "source.monogon.dev/metropolis/proto/api"
Mateusz Zalega2930e992022-04-25 12:52:35 +020025 cpb "source.monogon.dev/metropolis/proto/common"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020026 "source.monogon.dev/osbase/supervisor"
Serge Bazanski42e61c62021-03-18 15:07:18 +010027)
28
Serge Bazanskia959cbd2021-06-17 15:56:51 +020029type Manager struct {
30 storageRoot *localstorage.Root
31 networkService *network.Service
Serge Bazanski6dff6d62022-01-28 18:15:14 +010032 roleServer *roleserve.Service
Lorenz Brun35fcf032023-06-29 04:15:58 +020033 updateService *update.Service
Lorenz Brun85ad26a2023-03-27 17:00:00 +020034 nodeParams *apb.NodeParameters
Serge Bazanski5df62ba2023-03-22 17:56:46 +010035 haveTPM bool
Serge Bazanskia959cbd2021-06-17 15:56:51 +020036
Serge Bazanskife5192d2023-03-16 11:33:56 +010037 oneway chan struct{}
Serge Bazanskia959cbd2021-06-17 15:56:51 +020038}
39
40// NewManager creates a new cluster Manager. The given localstorage Root must
41// be places, but not yet started (and will be started as the Manager makes
42// progress). The given network Service must already be running.
Lorenz Brun35fcf032023-06-29 04:15:58 +020043func NewManager(storageRoot *localstorage.Root, networkService *network.Service, rs *roleserve.Service, updateService *update.Service, nodeParams *apb.NodeParameters, haveTPM bool) *Manager {
Serge Bazanskia959cbd2021-06-17 15:56:51 +020044 return &Manager{
45 storageRoot: storageRoot,
46 networkService: networkService,
Serge Bazanski6dff6d62022-01-28 18:15:14 +010047 roleServer: rs,
Lorenz Brun35fcf032023-06-29 04:15:58 +020048 updateService: updateService,
Lorenz Brun85ad26a2023-03-27 17:00:00 +020049 nodeParams: nodeParams,
Serge Bazanski5df62ba2023-03-22 17:56:46 +010050 haveTPM: haveTPM,
Serge Bazanskife5192d2023-03-16 11:33:56 +010051 oneway: make(chan struct{}),
Serge Bazanskia959cbd2021-06-17 15:56:51 +020052 }
53}
54
Serge Bazanskia959cbd2021-06-17 15:56:51 +020055// Run is the runnable of the Manager, to be started using the Supervisor. It
56// is one-shot, and should not be restarted.
57func (m *Manager) Run(ctx context.Context) error {
Serge Bazanskife5192d2023-03-16 11:33:56 +010058 select {
59 case <-m.oneway:
Serge Bazanskia959cbd2021-06-17 15:56:51 +020060 return fmt.Errorf("cannot restart cluster manager")
Serge Bazanskife5192d2023-03-16 11:33:56 +010061 default:
Serge Bazanskia959cbd2021-06-17 15:56:51 +020062 }
Serge Bazanskife5192d2023-03-16 11:33:56 +010063 close(m.oneway)
Serge Bazanskia959cbd2021-06-17 15:56:51 +020064
Serge Bazanskie4a4ce12023-03-22 18:29:54 +010065 // Try sealed configuration first.
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +010066 configuration, err := m.storageRoot.ESP.Metropolis.SealedConfiguration.Unseal(cpb.NodeTPMUsage_NODE_TPM_USAGE_PRESENT_AND_USED)
Serge Bazanskia959cbd2021-06-17 15:56:51 +020067 if err == nil {
68 supervisor.Logger(ctx).Info("Sealed configuration present. attempting to join cluster")
Mateusz Zalega2930e992022-04-25 12:52:35 +020069
70 // Read Cluster Directory and unmarshal it. Since the node is already
71 // registered with the cluster, the directory won't be bootstrapped from
72 // Node Parameters.
73 cd, err := m.storageRoot.ESP.Metropolis.ClusterDirectory.Unmarshal()
74 if err != nil {
75 return fmt.Errorf("while reading cluster directory: %w", err)
76 }
Serge Bazanskie4a4ce12023-03-22 18:29:54 +010077 return m.join(ctx, configuration, cd, true)
Serge Bazanskia959cbd2021-06-17 15:56:51 +020078 }
79
Serge Bazanski98054a12023-06-14 18:16:21 +020080 if !errors.Is(err, localstorage.ErrNoSealed) && !errors.Is(err, localstorage.ErrSealedCorrupted) {
Serge Bazanskia959cbd2021-06-17 15:56:51 +020081 return fmt.Errorf("unexpected sealed config error: %w", err)
82 }
83
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +010084 configuration, err = m.storageRoot.ESP.Metropolis.SealedConfiguration.Unseal(cpb.NodeTPMUsage_NODE_TPM_USAGE_NOT_PRESENT)
Serge Bazanskie4a4ce12023-03-22 18:29:54 +010085 if err == nil {
86 supervisor.Logger(ctx).Info("Non-sealed configuration present. attempting to join cluster")
87
88 // Read Cluster Directory and unmarshal it. Since the node is already
89 // registered with the cluster, the directory won't be bootstrapped from
90 // Node Parameters.
91 cd, err := m.storageRoot.ESP.Metropolis.ClusterDirectory.Unmarshal()
92 if err != nil {
93 return fmt.Errorf("while reading cluster directory: %w", err)
94 }
95 return m.join(ctx, configuration, cd, false)
96 }
97
Serge Bazanskia959cbd2021-06-17 15:56:51 +020098 supervisor.Logger(ctx).Info("No sealed configuration, looking for node parameters")
99
Lorenz Brun85ad26a2023-03-27 17:00:00 +0200100 switch inner := m.nodeParams.Cluster.(type) {
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200101 case *apb.NodeParameters_ClusterBootstrap_:
Serge Bazanski5839e972021-11-16 15:46:19 +0100102 err = m.bootstrap(ctx, inner.ClusterBootstrap)
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200103 case *apb.NodeParameters_ClusterRegister_:
Serge Bazanski5839e972021-11-16 15:46:19 +0100104 err = m.register(ctx, inner.ClusterRegister)
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200105 default:
Serge Bazanski5839e972021-11-16 15:46:19 +0100106 err = fmt.Errorf("node parameters misconfigured: neither cluster_bootstrap nor cluster_register set")
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200107 }
Serge Bazanski5839e972021-11-16 15:46:19 +0100108
109 if err == nil {
110 supervisor.Logger(ctx).Info("Cluster enrolment done.")
Serge Bazanskife5192d2023-03-16 11:33:56 +0100111 return nil
Serge Bazanski5839e972021-11-16 15:46:19 +0100112 }
113 return err
Serge Bazanskia959cbd2021-06-17 15:56:51 +0200114}