blob: 0dc7d5ebfea503d4e9f9c1736c2233dd713c7b3a [file] [log] [blame]
Lorenz Brunae0d90d2019-09-05 17:53:56 +02001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package main
18
19import (
Serge Bazanskicdb8c782020-02-17 12:34:02 +010020 "context"
Lorenz Brundd8c80e2019-10-07 16:19:49 +020021 "fmt"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020022 "log"
23 "net"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020024 "os"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020025 "os/signal"
Lorenz Brunf95909d2019-09-11 19:48:26 +020026 "runtime/debug"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020027
28 "go.uber.org/zap"
29 "golang.org/x/sys/unix"
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020030 "google.golang.org/grpc"
31
32 "git.monogon.dev/source/nexantic.git/core/internal/cluster"
33 "git.monogon.dev/source/nexantic.git/core/internal/common"
34 "git.monogon.dev/source/nexantic.git/core/internal/common/supervisor"
35 "git.monogon.dev/source/nexantic.git/core/internal/containerd"
36 "git.monogon.dev/source/nexantic.git/core/internal/kubernetes"
37 "git.monogon.dev/source/nexantic.git/core/internal/kubernetes/pki"
38 "git.monogon.dev/source/nexantic.git/core/internal/localstorage"
39 "git.monogon.dev/source/nexantic.git/core/internal/localstorage/declarative"
40 "git.monogon.dev/source/nexantic.git/core/internal/network"
41 "git.monogon.dev/source/nexantic.git/core/pkg/tpm"
42 apb "git.monogon.dev/source/nexantic.git/core/proto/api"
Lorenz Brunae0d90d2019-09-05 17:53:56 +020043)
44
Serge Bazanski1ebd1e12020-07-13 19:17:16 +020045var (
46 // kubernetesConfig is the static/global part of the Kubernetes service configuration. In the future, this might
47 // be configurable by loading it from the EnrolmentConfig. Fow now, it's static and same across all clusters.
48 kubernetesConfig = kubernetes.Config{
49 ServiceIPRange: net.IPNet{ // TODO(q3k): Decide if configurable / final value
50 IP: net.IP{192, 168, 188, 0},
51 Mask: net.IPMask{0xff, 0xff, 0xff, 0x00}, // /24, but Go stores as a literal mask
52 },
53 ClusterNet: net.IPNet{
54 IP: net.IP{10, 0, 0, 0},
55 Mask: net.IPMask{0xff, 0xff, 0x00, 0x00}, // /16
56 },
57 }
Leopold Schabela4516f92019-12-04 20:27:05 +000058)
59
Lorenz Brunae0d90d2019-09-05 17:53:56 +020060func main() {
Lorenz Brunf95909d2019-09-11 19:48:26 +020061 defer func() {
62 if r := recover(); r != nil {
63 fmt.Println("Init panicked:", r)
64 debug.PrintStack()
65 }
66 unix.Sync()
Leopold Schabel68c58752019-11-14 21:00:59 +010067 // TODO(lorenz): Switch this to Reboot when init panics are less likely
68 // Best effort, nothing we can do if this fails except printing the error to the console.
69 if err := unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF); err != nil {
70 panic(fmt.Sprintf("failed to halt node: %v\n", err))
71 }
Lorenz Brunf95909d2019-09-11 19:48:26 +020072 }()
Lorenz Brunae0d90d2019-09-05 17:53:56 +020073 logger, err := zap.NewDevelopment()
74 if err != nil {
75 panic(err)
76 }
Serge Bazanski581b0bd2020-03-12 13:36:43 +010077
78 // Remount onto a tmpfs and re-exec if needed. Otherwise, keep running.
79 err = switchRoot(logger)
80 if err != nil {
81 panic(fmt.Errorf("could not remount root: %w", err))
82 }
83
Lorenz Brun878f5f92020-05-12 16:15:39 +020084 // Linux kernel default is 4096 which is far too low. Raise it to 1M which is what gVisor suggests.
85 if err := unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{Cur: 1048576, Max: 1048576}); err != nil {
86 logger.Panic("Failed to raise rlimits", zap.Error(err))
87 }
88
Lorenz Brunae0d90d2019-09-05 17:53:56 +020089 logger.Info("Starting Smalltown Init")
90
Lorenz Brunae0d90d2019-09-05 17:53:56 +020091 signalChannel := make(chan os.Signal, 2)
92 signal.Notify(signalChannel)
93
Lorenz Brunae0d90d2019-09-05 17:53:56 +020094 if err := tpm.Initialize(logger.With(zap.String("component", "tpm"))); err != nil {
95 logger.Panic("Failed to initialize TPM 2.0", zap.Error(err))
96 }
97
Serge Bazanskib1b742f2020-03-24 13:58:19 +010098 networkSvc := network.New(network.Config{})
Leopold Schabel68c58752019-11-14 21:00:59 +010099
Lorenz Brun70f65b22020-07-08 17:02:47 +0200100 // This function initializes a headless Delve if this is a debug build or does nothing if it's not
101 initializeDebugger(networkSvc)
102
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200103 // Prepare local storage.
104 root := &localstorage.Root{}
105 if err := declarative.PlaceFS(root, "/"); err != nil {
106 panic(fmt.Errorf("when placing root FS: %w", err))
107 }
108
109 // trapdoor is a channel used to signal to the init service that a very low-level, unrecoverable failure
110 // occured. This causes a GURU MEDITATION ERROR visible to the end user.
111 trapdoor := make(chan struct{})
112
113 // Make context for supervisor. We cancel it when we reach the trapdoor.
114 ctxS, ctxC := context.WithCancel(context.Background())
115
116 // Start root initialization code as a supervisor one-shot runnable. This means waiting for the network, starting
117 // the cluster manager, and then starting all services related to the node's roles.
118 // TODO(q3k): move this to a separate 'init' service.
119 supervisor.New(ctxS, logger, func(ctx context.Context) error {
120 logger := supervisor.Logger(ctx)
121
122 // Start storage and network - we need this to get anything else done.
123 if err := root.Start(ctx); err != nil {
124 return fmt.Errorf("cannot start root FS: %w", err)
125 }
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100126 if err := supervisor.Run(ctx, "network", networkSvc.Run); err != nil {
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200127 return fmt.Errorf("when starting network: %w", err)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100128 }
Lorenz Brunf95909d2019-09-11 19:48:26 +0200129
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200130 // Wait for IP address from network.
131 ip, err := networkSvc.GetIP(ctx, true)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100132 if err != nil {
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200133 return fmt.Errorf("when waiting for IP address: %w", err)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100134 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200135
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200136 // Start cluster manager. This kicks off cluster membership machinery, which will either start
137 // a new cluster, enroll into one or join one.
138 m := cluster.NewManager(root, networkSvc)
139 if err := supervisor.Run(ctx, "enrolment", m.Run); err != nil {
140 return fmt.Errorf("when starting enrolment: %w", err)
141 }
142
143 // Wait until the cluster manager settles.
144 success := m.WaitFinished()
145 if !success {
146 close(trapdoor)
147 return fmt.Errorf("enrolment failed, aborting")
148 }
149
150 // We are now in a cluster. We can thus access our 'node' object and start all services that
151 // we should be running.
152
153 node := m.Node()
154 if err := node.ConfigureLocalHostname(&root.Etc); err != nil {
155 close(trapdoor)
156 return fmt.Errorf("failed to set local hostname: %w", err)
157 }
158
159 logger.Info("Enrolment success, continuing startup.")
160 logger.Info(fmt.Sprintf("This node (%s) has roles:", node.String()))
161 if cm := node.ConsensusMember(); cm != nil {
162 // There's no need to start anything for when we are a consensus member - the cluster
163 // manager does this for us if necessary (as creating/enrolling/joining a cluster is
164 // pretty tied into cluster lifecycle management).
165 logger.Info(fmt.Sprintf(" - etcd consensus member"))
166 }
167 if kw := node.KubernetesWorker(); kw != nil {
168 logger.Info(fmt.Sprintf(" - kubernetes worker"))
169 }
170
171 // If we're supposed to be a kubernetes worker, start kubernetes services and containerd.
172 // In the future, this might be split further into kubernetes control plane and data plane
173 // roles.
174 var containerdSvc *containerd.Service
175 var kubeSvc *kubernetes.Service
176 if kw := node.KubernetesWorker(); kw != nil {
177 logger.Info("Starting Kubernetes worker services...")
178
179 // Ensure Kubernetes PKI objects exist in etcd.
180 kpkiKV := m.ConsensusKV("cluster", "kpki")
181 kpki := pki.NewKubernetes(logger.Named("kpki"), kpkiKV)
182 if err := kpki.EnsureAll(ctx); err != nil {
183 return fmt.Errorf("failed to ensure kubernetes PKI present: %w", err)
184 }
185
186 containerdSvc = &containerd.Service{
187 EphemeralVolume: &root.Ephemeral.Containerd,
188 }
189 if err := supervisor.Run(ctx, "containerd", containerdSvc.Run); err != nil {
190 return fmt.Errorf("failed to start containerd service: %w", err)
191 }
192
193 kubernetesConfig.KPKI = kpki
194 kubernetesConfig.Root = root
195 kubernetesConfig.AdvertiseAddress = *ip
196 kubeSvc = kubernetes.New(kubernetesConfig)
197 if err := supervisor.Run(ctx, "kubernetes", kubeSvc.Run); err != nil {
198 return fmt.Errorf("failed to start kubernetes service: %w", err)
199 }
200
201 }
202
203 // Start the node debug service.
204 // TODO(q3k): this needs to be done in a smarter way once LogTree lands, and then a few things can be
205 // refactored to start this earlier, or this can be split up into a multiple gRPC service on a single listener.
206 dbg := &debugService{
207 cluster: m,
208 containerd: containerdSvc,
209 kubernetes: kubeSvc,
210 }
211 dbgSrv := grpc.NewServer()
212 apb.RegisterNodeDebugServiceServer(dbgSrv, dbg)
213 dbgLis, err := net.Listen("tcp", fmt.Sprintf(":%d", common.DebugServicePort))
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100214 if err != nil {
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200215 return fmt.Errorf("failed to listen on debug service: %w", err)
216 }
217 if err := supervisor.Run(ctx, "debug", supervisor.GRPCServer(dbgSrv, dbgLis, false)); err != nil {
218 return fmt.Errorf("failed to start debug service: %w", err)
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100219 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200220
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100221 supervisor.Signal(ctx, supervisor.SignalHealthy)
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200222 supervisor.Signal(ctx, supervisor.SignalDone)
223 return nil
224 })
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100225
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200226 // We're PID1, so orphaned processes get reparented to us to clean up
227 for {
228 select {
229 case <-trapdoor:
230 // If the trapdoor got closed, we got stuck early enough in the boot process that we can't do anything about
231 // it. Display a generic error message until we handle error conditions better.
232 ctxC()
233 log.Printf(" ########################")
234 log.Printf(" # GURU MEDIATION ERROR #")
235 log.Printf(" ########################")
236 log.Printf("")
237 log.Printf("Smalltown encountered an uncorrectable error and must be restarted.")
238 log.Printf("(Error condition: init trapdoor closed)")
239 log.Printf("")
240 select {}
241
242 case sig := <-signalChannel:
243 switch sig {
244 case unix.SIGCHLD:
245 var status unix.WaitStatus
246 var rusage unix.Rusage
247 for {
248 res, err := unix.Wait4(-1, &status, unix.WNOHANG, &rusage)
249 if err != nil && err != unix.ECHILD {
250 logger.Error("Failed to wait on orphaned child", zap.Error(err))
251 break
Serge Bazanskib1b742f2020-03-24 13:58:19 +0100252 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200253 if res <= 0 {
254 break
255 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200256 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200257 case unix.SIGURG:
258 // Go 1.14 introduced asynchronous preemption, which uses SIGURG.
259 // In order not to break backwards compatibility in the unlikely case
260 // of an application actually using SIGURG on its own, they're not filtering them.
261 // (https://github.com/golang/go/issues/37942)
262 logger.Debug("Ignoring SIGURG")
263 // TODO(lorenz): We can probably get more than just SIGCHLD as init, but I can't think
264 // of any others right now, just log them in case we hit any of them.
265 default:
266 logger.Warn("Got unexpected signal", zap.String("signal", sig.String()))
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200267 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200268 }
Serge Bazanski1ebd1e12020-07-13 19:17:16 +0200269 }
Lorenz Brunae0d90d2019-09-05 17:53:56 +0200270}