blob: de851540f80c8209cfee5128229801151a96904c [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
Lorenz Brun6e8f69c2019-11-18 10:44:24 +01002// SPDX-License-Identifier: Apache-2.0
Lorenz Brun6e8f69c2019-11-18 10:44:24 +01003
4package kubernetes
5
6import (
Lorenz Brun878f5f92020-05-12 16:15:39 +02007 "context"
Serge Bazanskidbfc6382020-06-19 20:35:43 +02008 "fmt"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +01009 "net"
Lorenz Brunb15abad2020-04-16 11:17:12 +020010
Lorenz Brun878f5f92020-05-12 16:15:39 +020011 "google.golang.org/grpc/codes"
12 "google.golang.org/grpc/status"
Lorenz Brunb15abad2020-04-16 11:17:12 +020013 "k8s.io/client-go/kubernetes"
Lorenz Brunf042e6f2020-06-24 16:46:09 +020014 "k8s.io/client-go/tools/clientcmd"
Lorenz Brun878f5f92020-05-12 16:15:39 +020015
Lorenz Brun4bde9312025-08-06 05:04:11 +020016 "source.monogon.dev/metropolis/node"
Jan Schärd20ddcc2024-05-08 14:18:29 +020017 "source.monogon.dev/metropolis/node/core/consensus"
Lorenz Brun1de8b182021-12-21 17:15:18 +010018 "source.monogon.dev/metropolis/node/core/identity"
Serge Bazanski31370b02021-01-07 16:31:14 +010019 "source.monogon.dev/metropolis/node/core/localstorage"
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010020 "source.monogon.dev/metropolis/node/core/network"
Lorenz Bruncc078df2021-12-23 11:51:55 +010021 "source.monogon.dev/metropolis/node/kubernetes/authproxy"
Tim Windelschmidtf64f1972023-07-28 00:00:50 +000022 "source.monogon.dev/metropolis/node/kubernetes/metricsproxy"
Serge Bazanski31370b02021-01-07 16:31:14 +010023 "source.monogon.dev/metropolis/node/kubernetes/pki"
24 "source.monogon.dev/metropolis/node/kubernetes/reconciler"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020025 "source.monogon.dev/osbase/supervisor"
Serge Bazanski6d1ff362024-09-30 15:15:31 +000026
27 ipb "source.monogon.dev/metropolis/node/core/curator/proto/api"
28 apb "source.monogon.dev/metropolis/proto/api"
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010029)
30
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010031type ConfigController struct {
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010032 ServiceIPRange net.IPNet
33 ClusterNet net.IPNet
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020034
Serge Bazanskie99638e2024-09-30 17:06:44 +000035 KPKI *pki.PKI
36 Root *localstorage.Root
37 Consensus consensus.ServiceHandle
38 Network *network.Service
39 Node *identity.NodeCredentials
40 Curator ipb.CuratorClient
41 Management apb.ManagementClient
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010042}
43
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010044type Controller struct {
45 c ConfigController
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020046}
Serge Bazanskidbfc6382020-06-19 20:35:43 +020047
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010048func NewController(c ConfigController) *Controller {
49 s := &Controller{
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020050 c: c,
51 }
52 return s
53}
54
Serge Bazanski6fdca3f2023-03-20 17:47:07 +010055func (s *Controller) Run(ctx context.Context) error {
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020056 controllerManagerConfig, err := getPKIControllerManagerConfig(ctx, s.c.KPKI)
57 if err != nil {
58 return fmt.Errorf("could not generate controller manager pki config: %w", err)
59 }
60 controllerManagerConfig.clusterNet = s.c.ClusterNet
Lorenz Brun6211e4d2023-11-14 19:09:40 +010061 controllerManagerConfig.serviceNet = s.c.ServiceIPRange
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020062 schedulerConfig, err := getPKISchedulerConfig(ctx, s.c.KPKI)
63 if err != nil {
64 return fmt.Errorf("could not generate scheduler pki config: %w", err)
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010065 }
Serge Bazanskidbfc6382020-06-19 20:35:43 +020066
Serge Bazanskie88ffe92023-03-21 13:38:46 +010067 masterKubeconfig, err := s.c.KPKI.Kubeconfig(ctx, pki.Master, pki.KubernetesAPIEndpointForController)
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020068 if err != nil {
69 return fmt.Errorf("could not generate master kubeconfig: %w", err)
70 }
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010071
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020072 rawClientConfig, err := clientcmd.NewClientConfigFromBytes(masterKubeconfig)
73 if err != nil {
74 return fmt.Errorf("could not generate kubernetes client config: %w", err)
75 }
Lorenz Brun6e8f69c2019-11-18 10:44:24 +010076
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020077 clientConfig, err := rawClientConfig.ClientConfig()
Tim Windelschmidt096654a2024-04-18 23:10:19 +020078 if err != nil {
79 return fmt.Errorf("could not fetch generate client config: %w", err)
80 }
Serge Bazanskic2c7ad92020-07-13 17:20:09 +020081 clientSet, err := kubernetes.NewForConfig(clientConfig)
82 if err != nil {
83 return fmt.Errorf("could not generate kubernetes client: %w", err)
84 }
85
Jan Schärd20ddcc2024-05-08 14:18:29 +020086 supervisor.Logger(ctx).Infof("Waiting for consensus...")
87 w := s.c.Consensus.Watch()
88 defer w.Close()
89 st, err := w.Get(ctx, consensus.FilterRunning)
90 if err != nil {
91 return fmt.Errorf("while waiting for consensus: %w", err)
92 }
93 etcd, err := st.CuratorClient()
94 if err != nil {
95 return fmt.Errorf("while retrieving consensus client: %w", err)
96 }
97
Serge Bazanskid8af5bf2021-03-16 13:38:29 +010098 // Sub-runnable which starts all parts of Kubernetes that depend on the
99 // machine's external IP address. If it changes, the runnable will exit.
100 // TODO(q3k): test this
101 supervisor.Run(ctx, "networked", func(ctx context.Context) error {
Serge Bazanskib63ed8a2024-03-05 14:24:38 +0000102 networkWatch := s.c.Network.Status.Watch()
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100103 defer networkWatch.Close()
Lorenz Brun339582b2020-07-29 18:13:35 +0200104
Lorenz Brun4bde9312025-08-06 05:04:11 +0200105 var status *node.NetStatus
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200106
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100107 supervisor.Logger(ctx).Info("Waiting for node networking...")
108 for status == nil || status.ExternalAddress == nil {
109 status, err = networkWatch.Get(ctx)
110 if err != nil {
111 return fmt.Errorf("failed to get network status: %w", err)
112 }
113 }
114 address := status.ExternalAddress
115 supervisor.Logger(ctx).Info("Node has active networking, starting apiserver/kubelet")
116
117 apiserver := &apiserverService{
118 KPKI: s.c.KPKI,
119 AdvertiseAddress: address,
120 ServiceIPRange: s.c.ServiceIPRange,
121 EphemeralConsensusDirectory: &s.c.Root.Ephemeral.Consensus,
122 }
123
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100124 err := supervisor.RunGroup(ctx, map[string]supervisor.Runnable{
125 "apiserver": apiserver.Run,
Serge Bazanskid8af5bf2021-03-16 13:38:29 +0100126 })
127 if err != nil {
128 return fmt.Errorf("when starting apiserver/kubelet: %w", err)
129 }
130
131 supervisor.Signal(ctx, supervisor.SignalHealthy)
132
133 for status.ExternalAddress.Equal(address) {
134 status, err = networkWatch.Get(ctx)
135 if err != nil {
136 return fmt.Errorf("when watching for network changes: %w", err)
137 }
138 }
139 return fmt.Errorf("network configuration changed (%s -> %s)", address.String(), status.ExternalAddress.String())
140 })
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200141
Jan Schärd20ddcc2024-05-08 14:18:29 +0200142 reconcilerService := &reconciler.Service{
143 Etcd: etcd,
144 ClientSet: clientSet,
145 NodeID: s.c.Node.ID(),
146 }
147 err = supervisor.Run(ctx, "reconciler", reconcilerService.Run)
148 if err != nil {
149 return fmt.Errorf("could not run sub-service reconciler: %w", err)
150 }
151
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000152 lm := labelmaker{
153 clientSet: clientSet,
154 curator: s.c.Curator,
Serge Bazanskie99638e2024-09-30 17:06:44 +0000155 mgmt: s.c.Management,
Serge Bazanski6d1ff362024-09-30 15:15:31 +0000156 }
157 if err := supervisor.Run(ctx, "labelmaker", lm.run); err != nil {
158 return err
159 }
160
Serge Bazanski356cbf32023-03-16 17:52:20 +0100161 // Before we start anything else, make sure reconciliation passes at least once.
162 // This makes the initial startup of a cluster much cleaner as we don't end up
163 // starting the scheduler/controller-manager/etc just to get them to immediately
164 // fail and back off with 'unauthorized'.
Jan Schärd20ddcc2024-05-08 14:18:29 +0200165 supervisor.Logger(ctx).Info("Waiting for reconciler...")
Jan Schärb86917b2025-05-14 16:31:08 +0000166 err = reconcilerService.WaitReady(ctx)
Jan Schärd20ddcc2024-05-08 14:18:29 +0200167 if err != nil {
168 return fmt.Errorf("while waiting for reconciler: %w", err)
Serge Bazanski356cbf32023-03-16 17:52:20 +0100169 }
Jan Schärd20ddcc2024-05-08 14:18:29 +0200170 supervisor.Logger(ctx).Info("Reconciler is done.")
Serge Bazanski356cbf32023-03-16 17:52:20 +0100171
Lorenz Bruncc078df2021-12-23 11:51:55 +0100172 authProxy := authproxy.Service{
173 KPKI: s.c.KPKI,
174 Node: s.c.Node,
175 }
176
Tim Windelschmidtf64f1972023-07-28 00:00:50 +0000177 metricsProxy := metricsproxy.Service{
178 KPKI: s.c.KPKI,
179 }
180
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200181 for _, sub := range []struct {
182 name string
183 runnable supervisor.Runnable
184 }{
Serge Bazanski967be212020-11-02 11:26:59 +0100185 {"controller-manager", runControllerManager(*controllerManagerConfig)},
186 {"scheduler", runScheduler(*schedulerConfig)},
Lorenz Bruncc078df2021-12-23 11:51:55 +0100187 {"authproxy", authProxy.Run},
Tim Windelschmidtf64f1972023-07-28 00:00:50 +0000188 {"metricsproxy", metricsProxy.Run},
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200189 } {
190 err := supervisor.Run(ctx, sub.name, sub.runnable)
191 if err != nil {
192 return fmt.Errorf("could not run sub-service %q: %w", sub.name, err)
193 }
194 }
195
196 supervisor.Signal(ctx, supervisor.SignalHealthy)
Lorenz Brunfa5c2fc2020-09-28 13:32:12 +0200197 <-ctx.Done()
Serge Bazanskic2c7ad92020-07-13 17:20:09 +0200198 return nil
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100199}
200
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200201// GetDebugKubeconfig issues a kubeconfig for an arbitrary given identity.
202// Useful for debugging and testing.
Serge Bazanski6fdca3f2023-03-20 17:47:07 +0100203func (s *Controller) GetDebugKubeconfig(ctx context.Context, request *apb.GetDebugKubeconfigRequest) (*apb.GetDebugKubeconfigResponse, error) {
Serge Bazanski9411f7c2021-03-10 13:12:53 +0100204 client, err := s.c.KPKI.VolatileClient(ctx, request.Id, request.Groups)
205 if err != nil {
206 return nil, status.Errorf(codes.Unavailable, "Failed to get volatile client certificate: %v", err)
207 }
Serge Bazanskie88ffe92023-03-21 13:38:46 +0100208 kubeconfig, err := pki.Kubeconfig(ctx, s.c.KPKI.KV, client, pki.KubernetesAPIEndpointForController)
Lorenz Brun878f5f92020-05-12 16:15:39 +0200209 if err != nil {
210 return nil, status.Errorf(codes.Unavailable, "Failed to generate kubeconfig: %v", err)
211 }
Serge Bazanski9411f7c2021-03-10 13:12:53 +0100212 return &apb.GetDebugKubeconfigResponse{DebugKubeconfig: string(kubeconfig)}, nil
Lorenz Brun6e8f69c2019-11-18 10:44:24 +0100213}