blob: aa293b0305c8e043f45794fe2b8bfd8a55587263 [file] [log] [blame]
Serge Bazanski42e61c62021-03-18 15:07:18 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
Serge Bazanskia959cbd2021-06-17 15:56:51 +020017// cluster implements low-level clustering logic, especially logic regarding to
18// bootstrapping, registering into and joining a cluster. Its goal is to provide
19// the rest of the node code with the following:
20// - A mounted plaintext storage.
21// - Node credentials/identity.
22// - A locally running etcd server if the node is supposed to run one, and a
23// client connection to that etcd cluster if so.
24// - The state of the cluster as seen by the node, to enable code to respond to
25// node lifecycle changes.
Serge Bazanski42e61c62021-03-18 15:07:18 +010026package cluster
27
28import (
Serge Bazanskia959cbd2021-06-17 15:56:51 +020029 "context"
30 "errors"
Serge Bazanski42e61c62021-03-18 15:07:18 +010031 "fmt"
Serge Bazanskia959cbd2021-06-17 15:56:51 +020032 "io/ioutil"
33 "sync"
Serge Bazanski42e61c62021-03-18 15:07:18 +010034
Serge Bazanskia959cbd2021-06-17 15:56:51 +020035 "google.golang.org/protobuf/proto"
36
37 "source.monogon.dev/metropolis/node/core/consensus"
38 "source.monogon.dev/metropolis/node/core/localstorage"
39 "source.monogon.dev/metropolis/node/core/network"
40 "source.monogon.dev/metropolis/pkg/event/memory"
41 "source.monogon.dev/metropolis/pkg/supervisor"
42 apb "source.monogon.dev/metropolis/proto/api"
43 ppb "source.monogon.dev/metropolis/proto/private"
Serge Bazanski42e61c62021-03-18 15:07:18 +010044)
45
Serge Bazanskia959cbd2021-06-17 15:56:51 +020046type state struct {
47 mu sync.RWMutex
Serge Bazanski42e61c62021-03-18 15:07:18 +010048
Serge Bazanskia959cbd2021-06-17 15:56:51 +020049 oneway bool
Serge Bazanski42e61c62021-03-18 15:07:18 +010050
Serge Bazanskia959cbd2021-06-17 15:56:51 +020051 configuration *ppb.SealedConfiguration
Serge Bazanski42e61c62021-03-18 15:07:18 +010052}
53
Serge Bazanskia959cbd2021-06-17 15:56:51 +020054type Manager struct {
55 storageRoot *localstorage.Root
56 networkService *network.Service
57 status memory.Value
58
59 state
60
61 // consensus is the spawned etcd/consensus service, if the Manager brought
62 // up a Node that should run one.
63 consensus *consensus.Service
64}
65
66// NewManager creates a new cluster Manager. The given localstorage Root must
67// be places, but not yet started (and will be started as the Manager makes
68// progress). The given network Service must already be running.
69func NewManager(storageRoot *localstorage.Root, networkService *network.Service) *Manager {
70 return &Manager{
71 storageRoot: storageRoot,
72 networkService: networkService,
73
74 state: state{},
75 }
76}
77
78func (m *Manager) lock() (*state, func()) {
79 m.mu.Lock()
80 return &m.state, m.mu.Unlock
81}
82
83func (m *Manager) rlock() (*state, func()) {
84 m.mu.RLock()
85 return &m.state, m.mu.RUnlock
86}
87
88// Run is the runnable of the Manager, to be started using the Supervisor. It
89// is one-shot, and should not be restarted.
90func (m *Manager) Run(ctx context.Context) error {
91 state, unlock := m.lock()
92 if state.oneway {
93 unlock()
94 // TODO(q3k): restart the entire system if this happens
95 return fmt.Errorf("cannot restart cluster manager")
96 }
97 state.oneway = true
98 unlock()
99
100 configuration, err := m.storageRoot.ESP.SealedConfiguration.Unseal()
101 if err == nil {
102 supervisor.Logger(ctx).Info("Sealed configuration present. attempting to join cluster")
103 return m.join(ctx, configuration)
104 }
105
106 if !errors.Is(err, localstorage.ErrNoSealed) {
107 return fmt.Errorf("unexpected sealed config error: %w", err)
108 }
109
110 supervisor.Logger(ctx).Info("No sealed configuration, looking for node parameters")
111
112 params, err := m.nodeParams(ctx)
113 if err != nil {
114 return fmt.Errorf("no parameters available: %w", err)
115 }
116
117 switch inner := params.Cluster.(type) {
118 case *apb.NodeParameters_ClusterBootstrap_:
119 return m.bootstrap(ctx, inner.ClusterBootstrap)
120 case *apb.NodeParameters_ClusterRegister_:
121 return m.register(ctx, inner.ClusterRegister)
122 default:
123 return fmt.Errorf("node parameters misconfigured: neither cluster_bootstrap nor cluster_register set")
124 }
125}
126
127func (m *Manager) register(ctx context.Context, bootstrap *apb.NodeParameters_ClusterRegister) error {
128 return fmt.Errorf("unimplemented")
129}
130
131func (m *Manager) nodeParamsFWCFG(ctx context.Context) (*apb.NodeParameters, error) {
132 bytes, err := ioutil.ReadFile("/sys/firmware/qemu_fw_cfg/by_name/dev.monogon.metropolis/parameters.pb/raw")
133 if err != nil {
134 return nil, fmt.Errorf("could not read firmware enrolment file: %w", err)
135 }
136
137 config := apb.NodeParameters{}
138 err = proto.Unmarshal(bytes, &config)
139 if err != nil {
140 return nil, fmt.Errorf("could not unmarshal: %v", err)
141 }
142
143 return &config, nil
144}
145
146func (m *Manager) nodeParams(ctx context.Context) (*apb.NodeParameters, error) {
147 // Retrieve node parameters from qemu's fwcfg interface or ESP.
148 // TODO(q3k): probably abstract this away and implement per platform/build/...
149 paramsFWCFG, err := m.nodeParamsFWCFG(ctx)
150 if err != nil {
151 supervisor.Logger(ctx).Warningf("Could not retrieve node parameters from qemu fwcfg: %v", err)
152 paramsFWCFG = nil
153 } else {
154 supervisor.Logger(ctx).Infof("Retrieved node parameters from qemu fwcfg")
155 }
156 paramsESP, err := m.storageRoot.ESP.NodeParameters.Unmarshal()
157 if err != nil {
158 supervisor.Logger(ctx).Warningf("Could not retrieve node parameters from ESP: %v", err)
159 paramsESP = nil
160 } else {
161 supervisor.Logger(ctx).Infof("Retrieved node parameters from ESP")
162 }
163 if paramsFWCFG == nil && paramsESP == nil {
164 return nil, fmt.Errorf("could not find node parameters in ESP or qemu fwcfg")
165 }
166 if paramsFWCFG != nil && paramsESP != nil {
167 supervisor.Logger(ctx).Warningf("Node parameters found both in both ESP and qemu fwcfg, using the latter")
168 return paramsFWCFG, nil
169 } else if paramsFWCFG != nil {
170 return paramsFWCFG, nil
171 } else {
172 return paramsESP, nil
173 }
174}
175
176func (m *Manager) join(ctx context.Context, cfg *ppb.SealedConfiguration) error {
177 return fmt.Errorf("unimplemented")
178}