Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
| 17 | package main |
| 18 | |
| 19 | import ( |
Serge Bazanski | cdb8c78 | 2020-02-17 12:34:02 +0100 | [diff] [blame] | 20 | "context" |
Lorenz Brun | dd8c80e | 2019-10-07 16:19:49 +0200 | [diff] [blame] | 21 | "fmt" |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 22 | "log" |
| 23 | "net" |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 24 | "os" |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 25 | "os/signal" |
Lorenz Brun | f95909d | 2019-09-11 19:48:26 +0200 | [diff] [blame] | 26 | "runtime/debug" |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 27 | |
| 28 | "go.uber.org/zap" |
| 29 | "golang.org/x/sys/unix" |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 30 | "google.golang.org/grpc" |
| 31 | |
| 32 | "git.monogon.dev/source/nexantic.git/core/internal/cluster" |
| 33 | "git.monogon.dev/source/nexantic.git/core/internal/common" |
| 34 | "git.monogon.dev/source/nexantic.git/core/internal/common/supervisor" |
| 35 | "git.monogon.dev/source/nexantic.git/core/internal/containerd" |
| 36 | "git.monogon.dev/source/nexantic.git/core/internal/kubernetes" |
| 37 | "git.monogon.dev/source/nexantic.git/core/internal/kubernetes/pki" |
| 38 | "git.monogon.dev/source/nexantic.git/core/internal/localstorage" |
| 39 | "git.monogon.dev/source/nexantic.git/core/internal/localstorage/declarative" |
| 40 | "git.monogon.dev/source/nexantic.git/core/internal/network" |
| 41 | "git.monogon.dev/source/nexantic.git/core/pkg/tpm" |
| 42 | apb "git.monogon.dev/source/nexantic.git/core/proto/api" |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 43 | ) |
| 44 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 45 | var ( |
| 46 | // kubernetesConfig is the static/global part of the Kubernetes service configuration. In the future, this might |
| 47 | // be configurable by loading it from the EnrolmentConfig. Fow now, it's static and same across all clusters. |
| 48 | kubernetesConfig = kubernetes.Config{ |
| 49 | ServiceIPRange: net.IPNet{ // TODO(q3k): Decide if configurable / final value |
| 50 | IP: net.IP{192, 168, 188, 0}, |
| 51 | Mask: net.IPMask{0xff, 0xff, 0xff, 0x00}, // /24, but Go stores as a literal mask |
| 52 | }, |
| 53 | ClusterNet: net.IPNet{ |
| 54 | IP: net.IP{10, 0, 0, 0}, |
| 55 | Mask: net.IPMask{0xff, 0xff, 0x00, 0x00}, // /16 |
| 56 | }, |
| 57 | } |
Leopold Schabel | a4516f9 | 2019-12-04 20:27:05 +0000 | [diff] [blame] | 58 | ) |
| 59 | |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 60 | func main() { |
Lorenz Brun | f95909d | 2019-09-11 19:48:26 +0200 | [diff] [blame] | 61 | defer func() { |
| 62 | if r := recover(); r != nil { |
| 63 | fmt.Println("Init panicked:", r) |
| 64 | debug.PrintStack() |
| 65 | } |
| 66 | unix.Sync() |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 67 | // TODO(lorenz): Switch this to Reboot when init panics are less likely |
| 68 | // Best effort, nothing we can do if this fails except printing the error to the console. |
| 69 | if err := unix.Reboot(unix.LINUX_REBOOT_CMD_POWER_OFF); err != nil { |
| 70 | panic(fmt.Sprintf("failed to halt node: %v\n", err)) |
| 71 | } |
Lorenz Brun | f95909d | 2019-09-11 19:48:26 +0200 | [diff] [blame] | 72 | }() |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 73 | logger, err := zap.NewDevelopment() |
| 74 | if err != nil { |
| 75 | panic(err) |
| 76 | } |
Serge Bazanski | 581b0bd | 2020-03-12 13:36:43 +0100 | [diff] [blame] | 77 | |
| 78 | // Remount onto a tmpfs and re-exec if needed. Otherwise, keep running. |
| 79 | err = switchRoot(logger) |
| 80 | if err != nil { |
| 81 | panic(fmt.Errorf("could not remount root: %w", err)) |
| 82 | } |
| 83 | |
Lorenz Brun | 878f5f9 | 2020-05-12 16:15:39 +0200 | [diff] [blame] | 84 | // Linux kernel default is 4096 which is far too low. Raise it to 1M which is what gVisor suggests. |
| 85 | if err := unix.Setrlimit(unix.RLIMIT_NOFILE, &unix.Rlimit{Cur: 1048576, Max: 1048576}); err != nil { |
| 86 | logger.Panic("Failed to raise rlimits", zap.Error(err)) |
| 87 | } |
| 88 | |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 89 | logger.Info("Starting Smalltown Init") |
| 90 | |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 91 | signalChannel := make(chan os.Signal, 2) |
| 92 | signal.Notify(signalChannel) |
| 93 | |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 94 | if err := tpm.Initialize(logger.With(zap.String("component", "tpm"))); err != nil { |
| 95 | logger.Panic("Failed to initialize TPM 2.0", zap.Error(err)) |
| 96 | } |
| 97 | |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 98 | networkSvc := network.New(network.Config{}) |
Leopold Schabel | 68c5875 | 2019-11-14 21:00:59 +0100 | [diff] [blame] | 99 | |
Lorenz Brun | 70f65b2 | 2020-07-08 17:02:47 +0200 | [diff] [blame] | 100 | // This function initializes a headless Delve if this is a debug build or does nothing if it's not |
| 101 | initializeDebugger(networkSvc) |
| 102 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 103 | // Prepare local storage. |
| 104 | root := &localstorage.Root{} |
| 105 | if err := declarative.PlaceFS(root, "/"); err != nil { |
| 106 | panic(fmt.Errorf("when placing root FS: %w", err)) |
| 107 | } |
| 108 | |
| 109 | // trapdoor is a channel used to signal to the init service that a very low-level, unrecoverable failure |
| 110 | // occured. This causes a GURU MEDITATION ERROR visible to the end user. |
| 111 | trapdoor := make(chan struct{}) |
| 112 | |
| 113 | // Make context for supervisor. We cancel it when we reach the trapdoor. |
| 114 | ctxS, ctxC := context.WithCancel(context.Background()) |
| 115 | |
| 116 | // Start root initialization code as a supervisor one-shot runnable. This means waiting for the network, starting |
| 117 | // the cluster manager, and then starting all services related to the node's roles. |
| 118 | // TODO(q3k): move this to a separate 'init' service. |
| 119 | supervisor.New(ctxS, logger, func(ctx context.Context) error { |
| 120 | logger := supervisor.Logger(ctx) |
| 121 | |
| 122 | // Start storage and network - we need this to get anything else done. |
| 123 | if err := root.Start(ctx); err != nil { |
| 124 | return fmt.Errorf("cannot start root FS: %w", err) |
| 125 | } |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 126 | if err := supervisor.Run(ctx, "network", networkSvc.Run); err != nil { |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 127 | return fmt.Errorf("when starting network: %w", err) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 128 | } |
Lorenz Brun | f95909d | 2019-09-11 19:48:26 +0200 | [diff] [blame] | 129 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 130 | // Wait for IP address from network. |
| 131 | ip, err := networkSvc.GetIP(ctx, true) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 132 | if err != nil { |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 133 | return fmt.Errorf("when waiting for IP address: %w", err) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 134 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 135 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 136 | // Start cluster manager. This kicks off cluster membership machinery, which will either start |
| 137 | // a new cluster, enroll into one or join one. |
| 138 | m := cluster.NewManager(root, networkSvc) |
| 139 | if err := supervisor.Run(ctx, "enrolment", m.Run); err != nil { |
| 140 | return fmt.Errorf("when starting enrolment: %w", err) |
| 141 | } |
| 142 | |
| 143 | // Wait until the cluster manager settles. |
| 144 | success := m.WaitFinished() |
| 145 | if !success { |
| 146 | close(trapdoor) |
| 147 | return fmt.Errorf("enrolment failed, aborting") |
| 148 | } |
| 149 | |
| 150 | // We are now in a cluster. We can thus access our 'node' object and start all services that |
| 151 | // we should be running. |
| 152 | |
| 153 | node := m.Node() |
| 154 | if err := node.ConfigureLocalHostname(&root.Etc); err != nil { |
| 155 | close(trapdoor) |
| 156 | return fmt.Errorf("failed to set local hostname: %w", err) |
| 157 | } |
| 158 | |
| 159 | logger.Info("Enrolment success, continuing startup.") |
| 160 | logger.Info(fmt.Sprintf("This node (%s) has roles:", node.String())) |
| 161 | if cm := node.ConsensusMember(); cm != nil { |
| 162 | // There's no need to start anything for when we are a consensus member - the cluster |
| 163 | // manager does this for us if necessary (as creating/enrolling/joining a cluster is |
| 164 | // pretty tied into cluster lifecycle management). |
| 165 | logger.Info(fmt.Sprintf(" - etcd consensus member")) |
| 166 | } |
| 167 | if kw := node.KubernetesWorker(); kw != nil { |
| 168 | logger.Info(fmt.Sprintf(" - kubernetes worker")) |
| 169 | } |
| 170 | |
| 171 | // If we're supposed to be a kubernetes worker, start kubernetes services and containerd. |
| 172 | // In the future, this might be split further into kubernetes control plane and data plane |
| 173 | // roles. |
| 174 | var containerdSvc *containerd.Service |
| 175 | var kubeSvc *kubernetes.Service |
| 176 | if kw := node.KubernetesWorker(); kw != nil { |
| 177 | logger.Info("Starting Kubernetes worker services...") |
| 178 | |
| 179 | // Ensure Kubernetes PKI objects exist in etcd. |
| 180 | kpkiKV := m.ConsensusKV("cluster", "kpki") |
| 181 | kpki := pki.NewKubernetes(logger.Named("kpki"), kpkiKV) |
| 182 | if err := kpki.EnsureAll(ctx); err != nil { |
| 183 | return fmt.Errorf("failed to ensure kubernetes PKI present: %w", err) |
| 184 | } |
| 185 | |
| 186 | containerdSvc = &containerd.Service{ |
| 187 | EphemeralVolume: &root.Ephemeral.Containerd, |
| 188 | } |
| 189 | if err := supervisor.Run(ctx, "containerd", containerdSvc.Run); err != nil { |
| 190 | return fmt.Errorf("failed to start containerd service: %w", err) |
| 191 | } |
| 192 | |
| 193 | kubernetesConfig.KPKI = kpki |
| 194 | kubernetesConfig.Root = root |
| 195 | kubernetesConfig.AdvertiseAddress = *ip |
| 196 | kubeSvc = kubernetes.New(kubernetesConfig) |
| 197 | if err := supervisor.Run(ctx, "kubernetes", kubeSvc.Run); err != nil { |
| 198 | return fmt.Errorf("failed to start kubernetes service: %w", err) |
| 199 | } |
| 200 | |
| 201 | } |
| 202 | |
| 203 | // Start the node debug service. |
| 204 | // TODO(q3k): this needs to be done in a smarter way once LogTree lands, and then a few things can be |
| 205 | // refactored to start this earlier, or this can be split up into a multiple gRPC service on a single listener. |
| 206 | dbg := &debugService{ |
| 207 | cluster: m, |
| 208 | containerd: containerdSvc, |
| 209 | kubernetes: kubeSvc, |
| 210 | } |
| 211 | dbgSrv := grpc.NewServer() |
| 212 | apb.RegisterNodeDebugServiceServer(dbgSrv, dbg) |
| 213 | dbgLis, err := net.Listen("tcp", fmt.Sprintf(":%d", common.DebugServicePort)) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 214 | if err != nil { |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 215 | return fmt.Errorf("failed to listen on debug service: %w", err) |
| 216 | } |
| 217 | if err := supervisor.Run(ctx, "debug", supervisor.GRPCServer(dbgSrv, dbgLis, false)); err != nil { |
| 218 | return fmt.Errorf("failed to start debug service: %w", err) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 219 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 220 | |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 221 | supervisor.Signal(ctx, supervisor.SignalHealthy) |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 222 | supervisor.Signal(ctx, supervisor.SignalDone) |
| 223 | return nil |
| 224 | }) |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 225 | |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 226 | // We're PID1, so orphaned processes get reparented to us to clean up |
| 227 | for { |
| 228 | select { |
| 229 | case <-trapdoor: |
| 230 | // If the trapdoor got closed, we got stuck early enough in the boot process that we can't do anything about |
| 231 | // it. Display a generic error message until we handle error conditions better. |
| 232 | ctxC() |
| 233 | log.Printf(" ########################") |
| 234 | log.Printf(" # GURU MEDIATION ERROR #") |
| 235 | log.Printf(" ########################") |
| 236 | log.Printf("") |
| 237 | log.Printf("Smalltown encountered an uncorrectable error and must be restarted.") |
| 238 | log.Printf("(Error condition: init trapdoor closed)") |
| 239 | log.Printf("") |
| 240 | select {} |
| 241 | |
| 242 | case sig := <-signalChannel: |
| 243 | switch sig { |
| 244 | case unix.SIGCHLD: |
| 245 | var status unix.WaitStatus |
| 246 | var rusage unix.Rusage |
| 247 | for { |
| 248 | res, err := unix.Wait4(-1, &status, unix.WNOHANG, &rusage) |
| 249 | if err != nil && err != unix.ECHILD { |
| 250 | logger.Error("Failed to wait on orphaned child", zap.Error(err)) |
| 251 | break |
Serge Bazanski | b1b742f | 2020-03-24 13:58:19 +0100 | [diff] [blame] | 252 | } |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 253 | if res <= 0 { |
| 254 | break |
| 255 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 256 | } |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 257 | case unix.SIGURG: |
| 258 | // Go 1.14 introduced asynchronous preemption, which uses SIGURG. |
| 259 | // In order not to break backwards compatibility in the unlikely case |
| 260 | // of an application actually using SIGURG on its own, they're not filtering them. |
| 261 | // (https://github.com/golang/go/issues/37942) |
| 262 | logger.Debug("Ignoring SIGURG") |
| 263 | // TODO(lorenz): We can probably get more than just SIGCHLD as init, but I can't think |
| 264 | // of any others right now, just log them in case we hit any of them. |
| 265 | default: |
| 266 | logger.Warn("Got unexpected signal", zap.String("signal", sig.String())) |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 267 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 268 | } |
Serge Bazanski | 1ebd1e1 | 2020-07-13 19:17:16 +0200 | [diff] [blame^] | 269 | } |
Lorenz Brun | ae0d90d | 2019-09-05 17:53:56 +0200 | [diff] [blame] | 270 | } |