blob: 2c2e885e2b6a63822f96ff2bc5459ee006c8cad0 [file] [log] [blame]
Serge Bazanski0d937772021-06-17 15:54:40 +02001// package roleserve implements the roleserver/“Role Server”.
2//
Serge Bazanski6dff6d62022-01-28 18:15:14 +01003// The Role Server runs on every node and is responsible for running all of the
4// node's role dependant services, like the control plane (Consensus/etcd and
5// Curator) and Kubernetes. It watches the node roles as assigned by the
6// cluster's curator, updates the status of the node within the curator, and
7// spawns on-demand services.
Serge Bazanski0d937772021-06-17 15:54:40 +02008//
Serge Bazanski0d937772021-06-17 15:54:40 +02009//
Serge Bazanski6dff6d62022-01-28 18:15:14 +010010// .-----------. .--------. Watches .------------.
11// | Cluster |--------->| Role |<----------| Node Roles |
12// | Enrolment | Provides | Server | Updates '------------'
13// '-----------' Data | |----. .-------------.
14// '--------' '----->| Node Status |
15// Spawns | | Spawns '-------------'
16// .-----' '-----.
17// V V
18// .-----------. .------------.
19// | Consensus | | Kubernetes |
20// | & Curator | | |
21// '-----------' '------------'
22//
23// The internal state of the Role Server (eg. status of services, input from
24// Cluster Enrolment, current node roles as retrieved from the cluster) is
25// stored as in-memory Event Value variables, with some of them being exposed
26// externally for other services to consume (ie. ones that wish to depend on
27// some information managed by the Role Server but which do not need to be
28// spawned on demand by the Role Server). These Event Values and code which acts
29// upon them form a reactive/dataflow-driven model which drives the Role Server
30// logic forward.
31//
32// The Role Server also has to handle the complex bootstrap problem involved in
33// simultaneously accessing the control plane (for node roles and other cluster
34// data) while maintaining (possibly the only one in the cluster) control plane
35// instance. The state of of resolution of this bootstrap problem is maintained
36// within ClusterMembership, which contains critical information about the
37// control plane, like the information required to connect to a Curator (local
38// or remote). It is updated both by external processes (ie. data from the
39// Cluster Enrolment) as well as logic responsible for spawning the control
40// plane.
41//
Serge Bazanski0d937772021-06-17 15:54:40 +020042package roleserve
43
44import (
45 "context"
Serge Bazanski6dff6d62022-01-28 18:15:14 +010046 "crypto/ed25519"
Serge Bazanski0d937772021-06-17 15:54:40 +020047
Lorenz Brun1de8b182021-12-21 17:15:18 +010048 "source.monogon.dev/metropolis/node/core/identity"
Serge Bazanski0d937772021-06-17 15:54:40 +020049 "source.monogon.dev/metropolis/node/core/localstorage"
50 "source.monogon.dev/metropolis/node/core/network"
Serge Bazanski0d937772021-06-17 15:54:40 +020051 "source.monogon.dev/metropolis/pkg/supervisor"
Serge Bazanski6dff6d62022-01-28 18:15:14 +010052 cpb "source.monogon.dev/metropolis/proto/common"
Serge Bazanski0d937772021-06-17 15:54:40 +020053)
54
55// Config is the configuration of the role server.
56type Config struct {
Serge Bazanski0d937772021-06-17 15:54:40 +020057 // StorageRoot is a handle to access all of the Node's storage. This is needed
58 // as the roleserver spawns complex workloads like Kubernetes which need access
59 // to a broad range of storage.
60 StorageRoot *localstorage.Root
61
62 // Network is a handle to the network service, used by workloads.
63 Network *network.Service
Serge Bazanski0d937772021-06-17 15:54:40 +020064}
65
66// Service is the roleserver/“Role Server” service. See the package-level
67// documentation for more details.
68type Service struct {
69 Config
70
Serge Bazanski6dff6d62022-01-28 18:15:14 +010071 ClusterMembership ClusterMembershipValue
72 KubernetesStatus KubernetesStatusValue
73 bootstrapData bootstrapDataValue
74 localRoles localRolesValue
Serge Bazanski0d937772021-06-17 15:54:40 +020075
Serge Bazanski6dff6d62022-01-28 18:15:14 +010076 controlPlane *workerControlPlane
77 statusPush *workerStatusPush
78 kubernetes *workerKubernetes
79 rolefetch *workerRoleFetch
Serge Bazanski0d937772021-06-17 15:54:40 +020080}
81
82// New creates a Role Server services from a Config.
83func New(c Config) *Service {
Serge Bazanski6dff6d62022-01-28 18:15:14 +010084 s := &Service{
Serge Bazanski0d937772021-06-17 15:54:40 +020085 Config: c,
Serge Bazanski0d937772021-06-17 15:54:40 +020086 }
Serge Bazanski6dff6d62022-01-28 18:15:14 +010087
88 s.controlPlane = &workerControlPlane{
89 storageRoot: s.StorageRoot,
90
91 bootstrapData: &s.bootstrapData,
92 clusterMembership: &s.ClusterMembership,
93 localRoles: &s.localRoles,
94 }
95
96 s.statusPush = &workerStatusPush{
97 network: s.Network,
98
99 clusterMembership: &s.ClusterMembership,
100 }
101
102 s.kubernetes = &workerKubernetes{
103 network: s.Network,
104 storageRoot: s.StorageRoot,
105
106 localRoles: &s.localRoles,
107 clusterMembership: &s.ClusterMembership,
108
109 kubernetesStatus: &s.KubernetesStatus,
110 }
111
112 s.rolefetch = &workerRoleFetch{
113 clusterMembership: &s.ClusterMembership,
114
115 localRoles: &s.localRoles,
116 }
117
118 return s
Serge Bazanski0d937772021-06-17 15:54:40 +0200119}
120
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100121func (s *Service) ProvideBootstrapData(privkey ed25519.PrivateKey, iok, cuk []byte) {
122 s.ClusterMembership.set(&ClusterMembership{
123 pubkey: privkey.Public().(ed25519.PublicKey),
124 })
125 s.bootstrapData.set(&bootstrapData{
126 nodePrivateKey: privkey,
127 initialOwnerKey: iok,
128 clusterUnlockKey: cuk,
129 })
Serge Bazanski0d937772021-06-17 15:54:40 +0200130}
131
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100132func (s *Service) ProvideRegisterData(credentials identity.NodeCredentials, directory *cpb.ClusterDirectory) {
133 s.ClusterMembership.set(&ClusterMembership{
134 remoteCurators: directory,
135 credentials: &credentials,
136 pubkey: credentials.PublicKey(),
137 })
Serge Bazanski0d937772021-06-17 15:54:40 +0200138}
139
140// Run the Role Server service, which uses intermediary workload launchers to
141// start/stop subordinate services as the Node's roles change.
142func (s *Service) Run(ctx context.Context) error {
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100143 supervisor.Run(ctx, "controlplane", s.controlPlane.run)
144 supervisor.Run(ctx, "kubernetes", s.kubernetes.run)
145 supervisor.Run(ctx, "statuspush", s.statusPush.run)
146 supervisor.Run(ctx, "rolefetch", s.rolefetch.run)
Serge Bazanski0d937772021-06-17 15:54:40 +0200147 supervisor.Signal(ctx, supervisor.SignalHealthy)
148
Serge Bazanski6dff6d62022-01-28 18:15:14 +0100149 <-ctx.Done()
150 return ctx.Err()
Serge Bazanski0d937772021-06-17 15:54:40 +0200151}