blob: c1cec27590f9d595cf5654f1ee0a208fc878296c [file] [log] [blame]
Lorenz Brun6e8f69c2019-11-18 10:44:24 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package kubernetes
18
19import (
Lorenz Brun878f5f92020-05-12 16:15:39 +020020 "context"
Serge Bazanskidbfc6382020-06-19 20:35:43 +020021 "fmt"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010022 "net"
Lorenz Brunb15abad2020-04-16 11:17:12 +020023 "time"
24
Lorenz Brun878f5f92020-05-12 16:15:39 +020025 "google.golang.org/grpc/codes"
26 "google.golang.org/grpc/status"
Lorenz Brunb15abad2020-04-16 11:17:12 +020027 "k8s.io/client-go/kubernetes"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020028 "k8s.io/client-go/tools/clientcmd"
Lorenz Brun878f5f92020-05-12 16:15:39 +020029
Lorenz Brun1de8b182021-12-21 17:15:18 +010030 "source.monogon.dev/metropolis/node/core/identity"
Serge Bazanski31370b02021-01-07 16:31:14 +010031 "source.monogon.dev/metropolis/node/core/localstorage"
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010032 "source.monogon.dev/metropolis/node/core/network"
Serge Bazanski31370b02021-01-07 16:31:14 +010033 "source.monogon.dev/metropolis/node/core/network/dns"
Lorenz Bruncc078df2021-12-23 11:51:55 +010034 "source.monogon.dev/metropolis/node/kubernetes/authproxy"
Tim Windelschmidtf64f1972023-07-28 00:00:50 +000035 "source.monogon.dev/metropolis/node/kubernetes/metricsproxy"
Serge Bazanski31370b02021-01-07 16:31:14 +010036 "source.monogon.dev/metropolis/node/kubernetes/pki"
37 "source.monogon.dev/metropolis/node/kubernetes/reconciler"
38 "source.monogon.dev/metropolis/pkg/supervisor"
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010039
Serge Bazanski31370b02021-01-07 16:31:14 +010040 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010041)
42
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010043type ConfigController struct {
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010044 ServiceIPRange net.IPNet
45 ClusterNet net.IPNet
Lorenz Brun78cefca2022-06-20 12:59:55 +000046 ClusterDomain string
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020047
Serge Bazanski2cfafc92023-03-21 16:42:47 +010048 KPKI *pki.PKI
49 Root *localstorage.Root
50 Network *network.Service
Serge Bazanskiad86a552024-01-31 17:46:47 +010051 Node *identity.NodeCredentials
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010052}
53
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010054type Controller struct {
55 c ConfigController
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020056}
Serge Bazanskidbfc6382020-06-19 20:35:43 +020057
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010058func NewController(c ConfigController) *Controller {
59 s := &Controller{
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020060 c: c,
61 }
62 return s
63}
64
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010065func (s *Controller) Run(ctx context.Context) error {
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020066 controllerManagerConfig, err := getPKIControllerManagerConfig(ctx, s.c.KPKI)
67 if err != nil {
68 return fmt.Errorf("could not generate controller manager pki config: %w", err)
69 }
70 controllerManagerConfig.clusterNet = s.c.ClusterNet
Lorenz Brun6211e4d2023-11-14 19:09:40 +010071 controllerManagerConfig.serviceNet = s.c.ServiceIPRange
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020072 schedulerConfig, err := getPKISchedulerConfig(ctx, s.c.KPKI)
73 if err != nil {
74 return fmt.Errorf("could not generate scheduler pki config: %w", err)
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010075 }
Serge Bazanskidbfc6382020-06-19 20:35:43 +020076
Serge Bazanskie88ffe92023-03-21 13:38:46 +010077 masterKubeconfig, err := s.c.KPKI.Kubeconfig(ctx, pki.Master, pki.KubernetesAPIEndpointForController)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020078 if err != nil {
79 return fmt.Errorf("could not generate master kubeconfig: %w", err)
80 }
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010081
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020082 rawClientConfig, err := clientcmd.NewClientConfigFromBytes(masterKubeconfig)
83 if err != nil {
84 return fmt.Errorf("could not generate kubernetes client config: %w", err)
85 }
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010086
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020087 clientConfig, err := rawClientConfig.ClientConfig()
Tim Windelschmidt096654a2024-04-18 23:10:19 +020088 if err != nil {
89 return fmt.Errorf("could not fetch generate client config: %w", err)
90 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020091 clientSet, err := kubernetes.NewForConfig(clientConfig)
92 if err != nil {
93 return fmt.Errorf("could not generate kubernetes client: %w", err)
94 }
95
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010096 // Sub-runnable which starts all parts of Kubernetes that depend on the
97 // machine's external IP address. If it changes, the runnable will exit.
98 // TODO(q3k): test this
99 supervisor.Run(ctx, "networked", func(ctx context.Context) error {
Serge Bazanskib63ed8a2024-03-05 14:24:38 +0000100 networkWatch := s.c.Network.Status.Watch()
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100101 defer networkWatch.Close()
Lorenz Brun339582b2020-07-29 18:13:35 +0200102
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100103 var status *network.Status
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200104
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100105 supervisor.Logger(ctx).Info("Waiting for node networking...")
106 for status == nil || status.ExternalAddress == nil {
107 status, err = networkWatch.Get(ctx)
108 if err != nil {
109 return fmt.Errorf("failed to get network status: %w", err)
110 }
111 }
112 address := status.ExternalAddress
113 supervisor.Logger(ctx).Info("Node has active networking, starting apiserver/kubelet")
114
115 apiserver := &apiserverService{
116 KPKI: s.c.KPKI,
117 AdvertiseAddress: address,
118 ServiceIPRange: s.c.ServiceIPRange,
119 EphemeralConsensusDirectory: &s.c.Root.Ephemeral.Consensus,
120 }
121
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100122 err := supervisor.RunGroup(ctx, map[string]supervisor.Runnable{
123 "apiserver": apiserver.Run,
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100124 })
125 if err != nil {
126 return fmt.Errorf("when starting apiserver/kubelet: %w", err)
127 }
128
129 supervisor.Signal(ctx, supervisor.SignalHealthy)
130
131 for status.ExternalAddress.Equal(address) {
132 status, err = networkWatch.Get(ctx)
133 if err != nil {
134 return fmt.Errorf("when watching for network changes: %w", err)
135 }
136 }
137 return fmt.Errorf("network configuration changed (%s -> %s)", address.String(), status.ExternalAddress.String())
138 })
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200139
Serge Bazanski356cbf32023-03-16 17:52:20 +0100140 // Before we start anything else, make sure reconciliation passes at least once.
141 // This makes the initial startup of a cluster much cleaner as we don't end up
142 // starting the scheduler/controller-manager/etc just to get them to immediately
143 // fail and back off with 'unauthorized'.
144 startLogging := time.Now().Add(2 * time.Second)
145 supervisor.Logger(ctx).Infof("Performing initial resource reconciliation...")
146 for {
147 err := reconciler.ReconcileAll(ctx, clientSet)
148 if err == nil {
149 supervisor.Logger(ctx).Infof("Initial resource reconciliation succeeded.")
Serge Bazanski356cbf32023-03-16 17:52:20 +0100150 break
151 }
152 if time.Now().After(startLogging) {
153 supervisor.Logger(ctx).Errorf("Still couldn't do initial reconciliation: %v", err)
Serge Bazanskid85a40a2023-03-22 11:14:08 +0100154 startLogging = time.Now().Add(10 * time.Second)
Serge Bazanski356cbf32023-03-16 17:52:20 +0100155 }
156 time.Sleep(100 * time.Millisecond)
157 }
158
Lorenz Bruncc078df2021-12-23 11:51:55 +0100159 authProxy := authproxy.Service{
160 KPKI: s.c.KPKI,
161 Node: s.c.Node,
162 }
163
Tim Windelschmidtf64f1972023-07-28 00:00:50 +0000164 metricsProxy := metricsproxy.Service{
165 KPKI: s.c.KPKI,
166 }
167
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200168 for _, sub := range []struct {
169 name string
170 runnable supervisor.Runnable
171 }{
Serge Bazanski967be212020-11-02 11:26:59 +0100172 {"controller-manager", runControllerManager(*controllerManagerConfig)},
173 {"scheduler", runScheduler(*schedulerConfig)},
Serge Bazanski356cbf32023-03-16 17:52:20 +0100174 {"reconciler", reconciler.Maintain(clientSet)},
Lorenz Bruncc078df2021-12-23 11:51:55 +0100175 {"authproxy", authProxy.Run},
Tim Windelschmidtf64f1972023-07-28 00:00:50 +0000176 {"metricsproxy", metricsProxy.Run},
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200177 } {
178 err := supervisor.Run(ctx, sub.name, sub.runnable)
179 if err != nil {
180 return fmt.Errorf("could not run sub-service %q: %w", sub.name, err)
181 }
182 }
183
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200184 supervisor.Logger(ctx).Info("Registering K8s CoreDNS")
Lorenz Brun78cefca2022-06-20 12:59:55 +0000185 clusterDNSDirective := dns.NewKubernetesDirective(s.c.ClusterDomain, masterKubeconfig)
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100186 s.c.Network.ConfigureDNS(clusterDNSDirective)
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200187
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200188 supervisor.Signal(ctx, supervisor.SignalHealthy)
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200189 <-ctx.Done()
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100190 s.c.Network.ConfigureDNS(dns.CancelDirective(clusterDNSDirective))
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200191 return nil
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100192}
193
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200194// GetDebugKubeconfig issues a kubeconfig for an arbitrary given identity.
195// Useful for debugging and testing.
Serge Bazanski6fdca3f2023-03-20 17:47:07 +0100196func (s *Controller) GetDebugKubeconfig(ctx context.Context, request *apb.GetDebugKubeconfigRequest) (*apb.GetDebugKubeconfigResponse, error) {
Serge Bazanski9411f7c2021-03-10 13:12:53 +0100197 client, err := s.c.KPKI.VolatileClient(ctx, request.Id, request.Groups)
198 if err != nil {
199 return nil, status.Errorf(codes.Unavailable, "Failed to get volatile client certificate: %v", err)
200 }
Serge Bazanskie88ffe92023-03-21 13:38:46 +0100201 kubeconfig, err := pki.Kubeconfig(ctx, s.c.KPKI.KV, client, pki.KubernetesAPIEndpointForController)
Lorenz Brun878f5f92020-05-12 16:15:39 +0200202 if err != nil {
203 return nil, status.Errorf(codes.Unavailable, "Failed to generate kubeconfig: %v", err)
204 }
Serge Bazanski9411f7c2021-03-10 13:12:53 +0100205 return &apb.GetDebugKubeconfigResponse{DebugKubeconfig: string(kubeconfig)}, nil
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100206}