blob: 02cce8c3e018199ac4b022fe5853f7e4cb2a61f3 [file] [log] [blame]
Lorenz Brun6e8f69c2019-11-18 10:44:24 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package kubernetes
18
19import (
Lorenz Brun878f5f92020-05-12 16:15:39 +020020 "context"
Serge Bazanskidbfc6382020-06-19 20:35:43 +020021 "fmt"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010022 "net"
Lorenz Brunb15abad2020-04-16 11:17:12 +020023
Lorenz Brun878f5f92020-05-12 16:15:39 +020024 "google.golang.org/grpc/codes"
25 "google.golang.org/grpc/status"
Lorenz Brunb15abad2020-04-16 11:17:12 +020026 "k8s.io/client-go/kubernetes"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020027 "k8s.io/client-go/tools/clientcmd"
Lorenz Brun878f5f92020-05-12 16:15:39 +020028
Jan Schärd20ddcc2024-05-08 14:18:29 +020029 "source.monogon.dev/metropolis/node/core/consensus"
Lorenz Brun1de8b182021-12-21 17:15:18 +010030 "source.monogon.dev/metropolis/node/core/identity"
Serge Bazanski31370b02021-01-07 16:31:14 +010031 "source.monogon.dev/metropolis/node/core/localstorage"
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010032 "source.monogon.dev/metropolis/node/core/network"
Lorenz Bruncc078df2021-12-23 11:51:55 +010033 "source.monogon.dev/metropolis/node/kubernetes/authproxy"
Tim Windelschmidtf64f1972023-07-28 00:00:50 +000034 "source.monogon.dev/metropolis/node/kubernetes/metricsproxy"
Serge Bazanski31370b02021-01-07 16:31:14 +010035 "source.monogon.dev/metropolis/node/kubernetes/pki"
36 "source.monogon.dev/metropolis/node/kubernetes/reconciler"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020037 "source.monogon.dev/osbase/supervisor"
Serge Bazanski6d1ff362024-09-30 15:15:31 +000038
39 ipb "source.monogon.dev/metropolis/node/core/curator/proto/api"
40 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010041)
42
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010043type ConfigController struct {
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010044 ServiceIPRange net.IPNet
45 ClusterNet net.IPNet
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020046
Serge Bazanskie99638e2024-09-30 17:06:44 +000047 KPKI *pki.PKI
48 Root *localstorage.Root
49 Consensus consensus.ServiceHandle
50 Network *network.Service
51 Node *identity.NodeCredentials
52 Curator ipb.CuratorClient
53 Management apb.ManagementClient
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010054}
55
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010056type Controller struct {
57 c ConfigController
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020058}
Serge Bazanskidbfc6382020-06-19 20:35:43 +020059
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010060func NewController(c ConfigController) *Controller {
61 s := &Controller{
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020062 c: c,
63 }
64 return s
65}
66
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010067func (s *Controller) Run(ctx context.Context) error {
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020068 controllerManagerConfig, err := getPKIControllerManagerConfig(ctx, s.c.KPKI)
69 if err != nil {
70 return fmt.Errorf("could not generate controller manager pki config: %w", err)
71 }
72 controllerManagerConfig.clusterNet = s.c.ClusterNet
Lorenz Brun6211e4d2023-11-14 19:09:40 +010073 controllerManagerConfig.serviceNet = s.c.ServiceIPRange
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020074 schedulerConfig, err := getPKISchedulerConfig(ctx, s.c.KPKI)
75 if err != nil {
76 return fmt.Errorf("could not generate scheduler pki config: %w", err)
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010077 }
Serge Bazanskidbfc6382020-06-19 20:35:43 +020078
Serge Bazanskie88ffe92023-03-21 13:38:46 +010079 masterKubeconfig, err := s.c.KPKI.Kubeconfig(ctx, pki.Master, pki.KubernetesAPIEndpointForController)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020080 if err != nil {
81 return fmt.Errorf("could not generate master kubeconfig: %w", err)
82 }
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010083
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020084 rawClientConfig, err := clientcmd.NewClientConfigFromBytes(masterKubeconfig)
85 if err != nil {
86 return fmt.Errorf("could not generate kubernetes client config: %w", err)
87 }
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010088
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020089 clientConfig, err := rawClientConfig.ClientConfig()
Tim Windelschmidt096654a2024-04-18 23:10:19 +020090 if err != nil {
91 return fmt.Errorf("could not fetch generate client config: %w", err)
92 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020093 clientSet, err := kubernetes.NewForConfig(clientConfig)
94 if err != nil {
95 return fmt.Errorf("could not generate kubernetes client: %w", err)
96 }
97
Jan Schärd20ddcc2024-05-08 14:18:29 +020098 supervisor.Logger(ctx).Infof("Waiting for consensus...")
99 w := s.c.Consensus.Watch()
100 defer w.Close()
101 st, err := w.Get(ctx, consensus.FilterRunning)
102 if err != nil {
103 return fmt.Errorf("while waiting for consensus: %w", err)
104 }
105 etcd, err := st.CuratorClient()
106 if err != nil {
107 return fmt.Errorf("while retrieving consensus client: %w", err)
108 }
109
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100110 // Sub-runnable which starts all parts of Kubernetes that depend on the
111 // machine's external IP address. If it changes, the runnable will exit.
112 // TODO(q3k): test this
113 supervisor.Run(ctx, "networked", func(ctx context.Context) error {
Serge Bazanskib63ed8a2024-03-05 14:24:38 +0000114 networkWatch := s.c.Network.Status.Watch()
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100115 defer networkWatch.Close()
Lorenz Brun339582b2020-07-29 18:13:35 +0200116
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100117 var status *network.Status
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200118
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100119 supervisor.Logger(ctx).Info("Waiting for node networking...")
120 for status == nil || status.ExternalAddress == nil {
121 status, err = networkWatch.Get(ctx)
122 if err != nil {
123 return fmt.Errorf("failed to get network status: %w", err)
124 }
125 }
126 address := status.ExternalAddress
127 supervisor.Logger(ctx).Info("Node has active networking, starting apiserver/kubelet")
128
129 apiserver := &apiserverService{
130 KPKI: s.c.KPKI,
131 AdvertiseAddress: address,
132 ServiceIPRange: s.c.ServiceIPRange,
133 EphemeralConsensusDirectory: &s.c.Root.Ephemeral.Consensus,
134 }
135
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100136 err := supervisor.RunGroup(ctx, map[string]supervisor.Runnable{
137 "apiserver": apiserver.Run,
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100138 })
139 if err != nil {
140 return fmt.Errorf("when starting apiserver/kubelet: %w", err)
141 }
142
143 supervisor.Signal(ctx, supervisor.SignalHealthy)
144
145 for status.ExternalAddress.Equal(address) {
146 status, err = networkWatch.Get(ctx)
147 if err != nil {
148 return fmt.Errorf("when watching for network changes: %w", err)
149 }
150 }
151 return fmt.Errorf("network configuration changed (%s -> %s)", address.String(), status.ExternalAddress.String())
152 })
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200153
Jan Schärd20ddcc2024-05-08 14:18:29 +0200154 reconcilerService := &reconciler.Service{
155 Etcd: etcd,
156 ClientSet: clientSet,
157 NodeID: s.c.Node.ID(),
158 }
159 err = supervisor.Run(ctx, "reconciler", reconcilerService.Run)
160 if err != nil {
161 return fmt.Errorf("could not run sub-service reconciler: %w", err)
162 }
163
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000164 lm := labelmaker{
165 clientSet: clientSet,
166 curator: s.c.Curator,
Serge Bazanskie99638e2024-09-30 17:06:44 +0000167 mgmt: s.c.Management,
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000168 }
169 if err := supervisor.Run(ctx, "labelmaker", lm.run); err != nil {
170 return err
171 }
172
Serge Bazanski356cbf32023-03-16 17:52:20 +0100173 // Before we start anything else, make sure reconciliation passes at least once.
174 // This makes the initial startup of a cluster much cleaner as we don't end up
175 // starting the scheduler/controller-manager/etc just to get them to immediately
176 // fail and back off with 'unauthorized'.
Jan Schärd20ddcc2024-05-08 14:18:29 +0200177 supervisor.Logger(ctx).Info("Waiting for reconciler...")
178 err = reconciler.WaitReady(ctx, etcd)
179 if err != nil {
180 return fmt.Errorf("while waiting for reconciler: %w", err)
Serge Bazanski356cbf32023-03-16 17:52:20 +0100181 }
Jan Schärd20ddcc2024-05-08 14:18:29 +0200182 supervisor.Logger(ctx).Info("Reconciler is done.")
Serge Bazanski356cbf32023-03-16 17:52:20 +0100183
Lorenz Bruncc078df2021-12-23 11:51:55 +0100184 authProxy := authproxy.Service{
185 KPKI: s.c.KPKI,
186 Node: s.c.Node,
187 }
188
Tim Windelschmidtf64f1972023-07-28 00:00:50 +0000189 metricsProxy := metricsproxy.Service{
190 KPKI: s.c.KPKI,
191 }
192
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200193 for _, sub := range []struct {
194 name string
195 runnable supervisor.Runnable
196 }{
Serge Bazanski967be212020-11-02 11:26:59 +0100197 {"controller-manager", runControllerManager(*controllerManagerConfig)},
198 {"scheduler", runScheduler(*schedulerConfig)},
Lorenz Bruncc078df2021-12-23 11:51:55 +0100199 {"authproxy", authProxy.Run},
Tim Windelschmidtf64f1972023-07-28 00:00:50 +0000200 {"metricsproxy", metricsProxy.Run},
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200201 } {
202 err := supervisor.Run(ctx, sub.name, sub.runnable)
203 if err != nil {
204 return fmt.Errorf("could not run sub-service %q: %w", sub.name, err)
205 }
206 }
207
208 supervisor.Signal(ctx, supervisor.SignalHealthy)
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200209 <-ctx.Done()
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200210 return nil
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100211}
212
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200213// GetDebugKubeconfig issues a kubeconfig for an arbitrary given identity.
214// Useful for debugging and testing.
Serge Bazanski6fdca3f2023-03-20 17:47:07 +0100215func (s *Controller) GetDebugKubeconfig(ctx context.Context, request *apb.GetDebugKubeconfigRequest) (*apb.GetDebugKubeconfigResponse, error) {
Serge Bazanski9411f7c2021-03-10 13:12:53 +0100216 client, err := s.c.KPKI.VolatileClient(ctx, request.Id, request.Groups)
217 if err != nil {
218 return nil, status.Errorf(codes.Unavailable, "Failed to get volatile client certificate: %v", err)
219 }
Serge Bazanskie88ffe92023-03-21 13:38:46 +0100220 kubeconfig, err := pki.Kubeconfig(ctx, s.c.KPKI.KV, client, pki.KubernetesAPIEndpointForController)
Lorenz Brun878f5f92020-05-12 16:15:39 +0200221 if err != nil {
222 return nil, status.Errorf(codes.Unavailable, "Failed to generate kubeconfig: %v", err)
223 }
Serge Bazanski9411f7c2021-03-10 13:12:53 +0100224 return &apb.GetDebugKubeconfigResponse{DebugKubeconfig: string(kubeconfig)}, nil
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100225}