blob: 23076a42d5c2a70ee3a0aac6ba920959e6efe638 [file] [log] [blame]
Serge Bazanski0d937772021-06-17 15:54:40 +02001// package roleserve implements the roleserver/“Role Server”.
2//
Serge Bazanski6dff6d62022-01-28 18:15:14 +01003// The Role Server runs on every node and is responsible for running all of the
4// node's role dependant services, like the control plane (Consensus/etcd and
5// Curator) and Kubernetes. It watches the node roles as assigned by the
6// cluster's curator, updates the status of the node within the curator, and
7// spawns on-demand services.
Serge Bazanski0d937772021-06-17 15:54:40 +02008//
Serge Bazanski0d937772021-06-17 15:54:40 +02009//
Serge Bazanski6dff6d62022-01-28 18:15:14 +010010// .-----------. .--------. Watches .------------.
11// | Cluster |--------->| Role |<----------| Node Roles |
12// | Enrolment | Provides | Server | Updates '------------'
13// '-----------' Data | |----. .-------------.
14// '--------' '----->| Node Status |
15// Spawns | | Spawns '-------------'
16// .-----' '-----.
17// V V
18// .-----------. .------------.
19// | Consensus | | Kubernetes |
20// | & Curator | | |
21// '-----------' '------------'
22//
23// The internal state of the Role Server (eg. status of services, input from
24// Cluster Enrolment, current node roles as retrieved from the cluster) is
25// stored as in-memory Event Value variables, with some of them being exposed
26// externally for other services to consume (ie. ones that wish to depend on
27// some information managed by the Role Server but which do not need to be
28// spawned on demand by the Role Server). These Event Values and code which acts
29// upon them form a reactive/dataflow-driven model which drives the Role Server
30// logic forward.
31//
32// The Role Server also has to handle the complex bootstrap problem involved in
33// simultaneously accessing the control plane (for node roles and other cluster
34// data) while maintaining (possibly the only one in the cluster) control plane
35// instance. The state of of resolution of this bootstrap problem is maintained
36// within ClusterMembership, which contains critical information about the
37// control plane, like the information required to connect to a Curator (local
38// or remote). It is updated both by external processes (ie. data from the
39// Cluster Enrolment) as well as logic responsible for spawning the control
40// plane.
41//
Serge Bazanski0d937772021-06-17 15:54:40 +020042package roleserve
43
44import (
45 "context"
Serge Bazanski6dff6d62022-01-28 18:15:14 +010046 "crypto/ed25519"
Serge Bazanski0d937772021-06-17 15:54:40 +020047
Serge Bazanskib43d0f02022-06-23 17:32:10 +020048 common "source.monogon.dev/metropolis/node"
Lorenz Brun1de8b182021-12-21 17:15:18 +010049 "source.monogon.dev/metropolis/node/core/identity"
Serge Bazanski0d937772021-06-17 15:54:40 +020050 "source.monogon.dev/metropolis/node/core/localstorage"
51 "source.monogon.dev/metropolis/node/core/network"
Serge Bazanskib43d0f02022-06-23 17:32:10 +020052 "source.monogon.dev/metropolis/node/core/rpc/resolver"
Serge Bazanski0d937772021-06-17 15:54:40 +020053 "source.monogon.dev/metropolis/pkg/supervisor"
Serge Bazanski6dff6d62022-01-28 18:15:14 +010054 cpb "source.monogon.dev/metropolis/proto/common"
Serge Bazanski0d937772021-06-17 15:54:40 +020055)
56
57// Config is the configuration of the role server.
58type Config struct {
Serge Bazanski0d937772021-06-17 15:54:40 +020059 // StorageRoot is a handle to access all of the Node's storage. This is needed
60 // as the roleserver spawns complex workloads like Kubernetes which need access
61 // to a broad range of storage.
62 StorageRoot *localstorage.Root
63
64 // Network is a handle to the network service, used by workloads.
65 Network *network.Service
Serge Bazanski0d937772021-06-17 15:54:40 +020066}
67
68// Service is the roleserver/“Role Server” service. See the package-level
69// documentation for more details.
70type Service struct {
71 Config
72
Serge Bazanski6dff6d62022-01-28 18:15:14 +010073 ClusterMembership ClusterMembershipValue
74 KubernetesStatus KubernetesStatusValue
75 bootstrapData bootstrapDataValue
76 localRoles localRolesValue
Serge Bazanski0d937772021-06-17 15:54:40 +020077
Serge Bazanski6dff6d62022-01-28 18:15:14 +010078 controlPlane *workerControlPlane
79 statusPush *workerStatusPush
Mateusz Zalega32b19292022-05-17 13:26:55 +020080 heartbeat *workerHeartbeat
Serge Bazanski6dff6d62022-01-28 18:15:14 +010081 kubernetes *workerKubernetes
82 rolefetch *workerRoleFetch
Serge Bazanskib43d0f02022-06-23 17:32:10 +020083
84 // resolver is the main, long-lived, authenticated cluster resolver that is used
85 // for all subsequent gRPC calls by the subordinates of the roleserver. It is
86 // created early in the roleserver lifecycle, and is seeded with node
87 // information as the first subordinate runs DialCurator().
88 resolver *resolver.Resolver
Serge Bazanski0d937772021-06-17 15:54:40 +020089}
90
91// New creates a Role Server services from a Config.
92func New(c Config) *Service {
Serge Bazanskib43d0f02022-06-23 17:32:10 +020093 // Run the resolver forever in the background, making sure to keep it as
94 // long-lived as possible.
95 rctx := context.Background()
Serge Bazanski6dff6d62022-01-28 18:15:14 +010096
Serge Bazanskib43d0f02022-06-23 17:32:10 +020097 s := &Service{
98 Config: c,
99 resolver: resolver.New(rctx),
100 }
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100101 s.controlPlane = &workerControlPlane{
102 storageRoot: s.StorageRoot,
103
104 bootstrapData: &s.bootstrapData,
105 clusterMembership: &s.ClusterMembership,
106 localRoles: &s.localRoles,
Serge Bazanskib43d0f02022-06-23 17:32:10 +0200107 resolver: s.resolver,
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100108 }
109
110 s.statusPush = &workerStatusPush{
111 network: s.Network,
112
113 clusterMembership: &s.ClusterMembership,
114 }
115
Mateusz Zalega32b19292022-05-17 13:26:55 +0200116 s.heartbeat = &workerHeartbeat{
117 network: s.Network,
118
119 clusterMembership: &s.ClusterMembership,
120 }
121
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100122 s.kubernetes = &workerKubernetes{
123 network: s.Network,
124 storageRoot: s.StorageRoot,
125
126 localRoles: &s.localRoles,
127 clusterMembership: &s.ClusterMembership,
128
129 kubernetesStatus: &s.KubernetesStatus,
130 }
131
132 s.rolefetch = &workerRoleFetch{
133 clusterMembership: &s.ClusterMembership,
134
135 localRoles: &s.localRoles,
136 }
137
138 return s
Serge Bazanski0d937772021-06-17 15:54:40 +0200139}
140
Mateusz Zalega2930e992022-04-25 12:52:35 +0200141func (s *Service) ProvideBootstrapData(privkey ed25519.PrivateKey, iok, cuk, nuk, jkey []byte) {
Serge Bazanskib43d0f02022-06-23 17:32:10 +0200142 pubkey := privkey.Public().(ed25519.PublicKey)
143 nid := identity.NodeID(pubkey)
144
145 // This is the first time we have the node ID, tell the resolver that it's
146 // available on the loopback interface.
147 s.resolver.AddOverride(nid, resolver.NodeByHostPort("127.0.0.1", uint16(common.CuratorServicePort)))
148
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100149 s.ClusterMembership.set(&ClusterMembership{
Serge Bazanskib43d0f02022-06-23 17:32:10 +0200150 pubkey: pubkey,
151 resolver: s.resolver,
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100152 })
153 s.bootstrapData.set(&bootstrapData{
Mateusz Zalega2930e992022-04-25 12:52:35 +0200154 nodePrivateKey: privkey,
155 initialOwnerKey: iok,
156 clusterUnlockKey: cuk,
157 nodeUnlockKey: nuk,
158 nodePrivateJoinKey: jkey,
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100159 })
Serge Bazanski0d937772021-06-17 15:54:40 +0200160}
161
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100162func (s *Service) ProvideRegisterData(credentials identity.NodeCredentials, directory *cpb.ClusterDirectory) {
Serge Bazanskib43d0f02022-06-23 17:32:10 +0200163 // This is the first time we have the node ID, tell the resolver that it's
164 // available on the loopback interface.
165 s.resolver.AddOverride(credentials.ID(), resolver.NodeByHostPort("127.0.0.1", uint16(common.CuratorServicePort)))
166
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100167 s.ClusterMembership.set(&ClusterMembership{
168 remoteCurators: directory,
169 credentials: &credentials,
170 pubkey: credentials.PublicKey(),
Serge Bazanskib43d0f02022-06-23 17:32:10 +0200171 resolver: s.resolver,
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100172 })
Serge Bazanski0d937772021-06-17 15:54:40 +0200173}
174
Mateusz Zalega2930e992022-04-25 12:52:35 +0200175func (s *Service) ProvideJoinData(credentials identity.NodeCredentials, directory *cpb.ClusterDirectory) {
Serge Bazanskib43d0f02022-06-23 17:32:10 +0200176 // This is the first time we have the node ID, tell the resolver that it's
177 // available on the loopback interface.
178 s.resolver.AddOverride(credentials.ID(), resolver.NodeByHostPort("127.0.0.1", uint16(common.CuratorServicePort)))
179
Mateusz Zalega2930e992022-04-25 12:52:35 +0200180 s.ClusterMembership.set(&ClusterMembership{
181 remoteCurators: directory,
182 credentials: &credentials,
183 pubkey: credentials.PublicKey(),
Serge Bazanskib43d0f02022-06-23 17:32:10 +0200184 resolver: s.resolver,
Mateusz Zalega2930e992022-04-25 12:52:35 +0200185 })
186}
187
Serge Bazanski0d937772021-06-17 15:54:40 +0200188// Run the Role Server service, which uses intermediary workload launchers to
189// start/stop subordinate services as the Node's roles change.
190func (s *Service) Run(ctx context.Context) error {
Serge Bazanskib43d0f02022-06-23 17:32:10 +0200191 s.resolver.SetLogger(func(f string, args ...interface{}) {
192 supervisor.Logger(ctx).WithAddedStackDepth(2).Infof(f, args...)
193 })
194
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100195 supervisor.Run(ctx, "controlplane", s.controlPlane.run)
196 supervisor.Run(ctx, "kubernetes", s.kubernetes.run)
197 supervisor.Run(ctx, "statuspush", s.statusPush.run)
Mateusz Zalega32b19292022-05-17 13:26:55 +0200198 supervisor.Run(ctx, "heartbeat", s.heartbeat.run)
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100199 supervisor.Run(ctx, "rolefetch", s.rolefetch.run)
Serge Bazanski0d937772021-06-17 15:54:40 +0200200 supervisor.Signal(ctx, supervisor.SignalHealthy)
201
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100202 <-ctx.Done()
203 return ctx.Err()
Serge Bazanski0d937772021-06-17 15:54:40 +0200204}