blob: 6373b648e8f464b4fb6ff98ce851e01a110b04fe [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +02004// Package bringup implements a simple wrapper which configures all default
5// mounts, logging and the corresponding forwarders to tty0 and ttyS0. It
6// then configures a new logtree and starts a supervisor to run the provided
Tim Windelschmidt83da4222024-12-16 02:49:50 +01007// supervisor.Runnable. Said Runnable is expected to return no error. If it
8// does, the supervisor will exit, an error will be printed and the system will
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +02009// reboot after five seconds.
10package bringup
11
12import (
Tim Windelschmidt83da4222024-12-16 02:49:50 +010013 "bufio"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020014 "context"
Tim Windelschmidt83da4222024-12-16 02:49:50 +010015 "errors"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020016 "fmt"
17 "os"
Tim Windelschmidt83da4222024-12-16 02:49:50 +010018 "runtime/debug"
19 "strings"
20 "sync/atomic"
21 "syscall"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020022 "time"
23
Tim Windelschmidt83da4222024-12-16 02:49:50 +010024 "github.com/opencontainers/runc/libcontainer/cgroups"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020025 "go.uber.org/multierr"
26 "golang.org/x/sys/unix"
27
Tim Windelschmidt83da4222024-12-16 02:49:50 +010028 "source.monogon.dev/go/logging"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020029 "source.monogon.dev/osbase/bootparam"
30 "source.monogon.dev/osbase/efivarfs"
31 "source.monogon.dev/osbase/logtree"
32 "source.monogon.dev/osbase/supervisor"
33)
34
Tim Windelschmidt83da4222024-12-16 02:49:50 +010035type Config struct {
36 Console ConsoleConfig
37 Supervisor SupervisorConfig
38}
39
40type ConsoleConfig struct {
41 ShortenDictionary logtree.ShortenDictionary
42
43 // Filter is used to filter out some uselessly verbose logs from the
44 // console. It should return true if an entry is allowed to be printed.
45 Filter func(*logtree.LogEntry) bool
46}
47
48type SupervisorConfig struct {
49 Metrics []supervisor.Metrics
50}
51
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020052type Runnable supervisor.Runnable
53
54func (r Runnable) Run() {
Tim Windelschmidt83da4222024-12-16 02:49:50 +010055 r.RunWith(Config{})
56}
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020057
Tim Windelschmidt83da4222024-12-16 02:49:50 +010058func (r Runnable) RunWith(cfg Config) {
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020059 if err := setupMounts(); err != nil {
60 // We cannot do anything if we fail to mount.
61 panic(err)
62 }
63
Tim Windelschmidt83da4222024-12-16 02:49:50 +010064 // Root system logtree.
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020065 lt := logtree.New()
Tim Windelschmidt83da4222024-12-16 02:49:50 +010066
67 // Collect serial consoles from cmdline and defaults.
68 serialConsoles := collectConsoles()
69
70 // Setup console writers
71 if err := setupConsoles(lt, serialConsoles, cfg.Console); err != nil {
72 panic(err)
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020073 }
74
Tim Windelschmidt7dac92b2024-12-16 02:51:04 +010075 // Initialize persistent panic handler
76 initPanicHandler(lt, serialConsoles)
77
Tim Windelschmidt83da4222024-12-16 02:49:50 +010078 // Rewire os.Stdout and os.Stderr to logtree which then is printed
79 // to serial consoles.
80 if err := rewireStdIO(lt); err != nil {
81 panic(err)
82 }
83
Tim Windelschmidt83da4222024-12-16 02:49:50 +010084 // Initial logger. Used until we get to a supervisor.
85 logger := lt.MustLeveledFor("init")
86
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020087 sCtx, cancel := context.WithCancelCause(context.Background())
88
Tim Windelschmidt83da4222024-12-16 02:49:50 +010089 supervisorOptions := []supervisor.SupervisorOpt{
90 supervisor.WithExistingLogtree(lt),
91 }
92
93 for _, m := range cfg.Supervisor.Metrics {
94 supervisorOptions = append(supervisorOptions, supervisor.WithMetrics(m))
95 }
96
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020097 // Don't reschedule the root runnable...
Tim Windelschmidt83da4222024-12-16 02:49:50 +010098 var started atomic.Bool
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020099 supervisor.New(sCtx, func(ctx context.Context) (err error) {
100 defer func() {
101 if r := recover(); r != nil {
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100102 err = fmt.Errorf("root runnable paniced: \n%s:\n%s", r, debug.Stack())
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200103 cancel(err)
104 }
105 }()
106
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100107 if started.Swap(true) {
108 err = fmt.Errorf("root runnable restarted")
109 cancel(err)
110 return
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200111 }
112
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100113 if err := supervisor.Run(ctx, "pstore", dumpAndCleanPstore); err != nil {
114 return fmt.Errorf("when starting pstore: %w", err)
115 }
116
117 err = r(ctx)
118 if err != nil {
119 cancel(err)
120 return
121 }
122
123 return
124 }, supervisorOptions...)
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200125
126 <-sCtx.Done()
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100127
128 time.Sleep(time.Second)
129
130 // Write final messages on panic to stderr.
131 logger.Errorf("Fatal error: %+v", context.Cause(sCtx))
132 logger.Error("This node could not be started. Rebooting...")
133 time.Sleep(time.Second)
134
135 // After a bit, kill all console log readers.
136 for _, c := range serialConsoles {
137 if c.reader == nil {
138 continue
139 }
140 c.reader.Close()
141 c.reader.Stream = nil
142 }
143
144 // Wait for final logs to flush to console...
145 time.Sleep(5 * time.Second)
146 unix.Sync()
147 unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200148}
149
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100150func rewireStdIO(lt *logtree.LogTree) error {
151 if err := rewireFD(lt, "stderr", os.Stderr, logging.Leveled.Error); err != nil {
152 return fmt.Errorf("failed rewiring stderr: %w", err)
153 }
154 if err := rewireFD(lt, "stdout", os.Stdout, logging.Leveled.Info); err != nil {
155 return fmt.Errorf("failed rewiring stdout: %w", err)
156 }
157 return nil
158}
159
160func rewireFD(lt *logtree.LogTree, dn logtree.DN, f *os.File, writeLog func(logging.Leveled, ...any)) error {
161 r, w, err := os.Pipe()
162 if err != nil {
163 return fmt.Errorf("creating pipe for %q: %w", dn, err)
164 }
165 defer w.Close()
166 // We don't need to close this pipe since we need it for the entire
167 // process lifetime.
168
169 l := lt.MustLeveledFor(dn)
170 go func() {
171 r := bufio.NewReader(r)
172 for {
173 line, err := r.ReadString('\n')
174 if err != nil {
175 panic(err)
176 }
177
178 writeLog(l, strings.TrimRight(line, "\n"))
179 }
180 }()
181
182 wConn, err := w.SyscallConn()
183 if err != nil {
184 return fmt.Errorf("error getting SyscallConn for %q: %w", dn, err)
185 }
186 fConn, err := f.SyscallConn()
187 if err != nil {
188 return fmt.Errorf("error getting SyscallConn for %q: %w", dn, err)
189 }
190 var wErr, fErr error
191 wErr = wConn.Control(func(wFd uintptr) {
192 fErr = fConn.Control(func(fFd uintptr) {
193 err = syscall.Dup2(int(wFd), int(fFd))
194 })
195 })
196
197 err = errors.Join(wErr, fErr, err)
198 if err != nil {
199 return fmt.Errorf("failed to duplicate file descriptor %q: %w", dn, err)
200 }
201
202 return nil
203}
204
205func mkdirAndMount(dir, fs string, flags uintptr, data string) error {
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200206 if err := os.MkdirAll(dir, 0o755); err != nil {
207 return fmt.Errorf("could not make %s: %w", dir, err)
208 }
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100209 if err := unix.Mount(fs, dir, fs, flags, data); err != nil {
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200210 return fmt.Errorf("could not mount %s on %s: %w", fs, dir, err)
211 }
212 return nil
213}
214
215// setupMounts sets up basic mounts like sysfs, procfs, devtmpfs and cgroups.
216// This should be called early during init as a lot of processes depend on this
217// being available.
218func setupMounts() (err error) {
219 // Set up target filesystems.
220 for _, el := range []struct {
221 dir string
222 fs string
223 flags uintptr
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100224 data string
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200225 }{
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100226 {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
227 {"/sys/kernel/tracing", "tracefs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
228 {"/sys/fs/pstore", "pstore", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
229 {"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
230 {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID, ""},
231 {"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID, ""},
232 // Nothing currently uses /dev/shm, but it's required
233 // by containerd when the host IPC namespace is shared, which
234 // is required by "kubectl debug node/" and specific customer applications.
235 // https://github.com/monogon-dev/monogon/issues/305.
236 {"/dev/shm", "tmpfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
237 {"/sys/fs/cgroup", "cgroup2", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, "nsdelegate,memory_recursiveprot"},
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200238 } {
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100239 err = multierr.Append(err, mkdirAndMount(el.dir, el.fs, el.flags, el.data))
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200240 }
241
242 // We try to mount efivarfs but ignore any error,
243 // as we don't want to crash on non-EFI systems.
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100244 _ = mkdirAndMount(efivarfs.Path, "efivarfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, "")
245
246 // Create main cgroup "everything" and move ourselves into it.
247 err = multierr.Append(err, os.Mkdir("/sys/fs/cgroup/everything", 0755))
248 err = multierr.Append(err, cgroups.WriteCgroupProc("/sys/fs/cgroup/everything", os.Getpid()))
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200249 return
250}
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100251
252type console struct {
253 path string
254 maxWidth int
255 reader *logtree.LogReader
256}
257
258func collectConsoles() []*console {
259 const defaultMaxWidth = 120
260
261 // Add the two standard tty0/ttyS0 consoles
262 consoles := map[string]int{
263 "tty0": defaultMaxWidth,
264 "ttyS0": defaultMaxWidth,
265 }
266
267 // Parse consoles from the kernel command line.
268 cmdline, err := os.ReadFile("/proc/cmdline")
269 if err == nil {
270 params, _, err := bootparam.Unmarshal(string(cmdline))
271 if err == nil {
272 for v := range params.Consoles() {
273 consoles[v] = defaultMaxWidth
274 }
275 }
276 }
277
278 var serialConsoles []*console
279 for consolePath, maxWidth := range consoles {
280 serialConsoles = append(serialConsoles, &console{
281 path: "/dev/" + consolePath,
282 maxWidth: maxWidth,
283 })
284 }
285
286 return serialConsoles
287}
288
289func setupConsoles(lt *logtree.LogTree, serialConsoles []*console, ltc ConsoleConfig) error {
290 filterFn := ltc.Filter
291 if filterFn == nil {
292 filterFn = func(*logtree.LogEntry) bool {
293 return true
294 }
295 }
296
297 // Open up consoles and set up logging from logtree and crash channel.
298 for _, c := range serialConsoles {
299 f, err := os.OpenFile(c.path, os.O_WRONLY, 0)
300 if err != nil {
301 continue
302 }
303
304 reader, err := lt.Read("", logtree.WithChildren(), logtree.WithStream())
305 if err != nil {
306 return fmt.Errorf("could not set up root log reader: %w", err)
307 }
308 c.reader = reader
309
310 go func() {
311 fmt.Fprintf(f, "This is %s. Verbose node logs follow.\n\n", f.Name())
312 for p := range reader.Stream {
313 if filterFn(p) {
314 fmt.Fprintf(f, "%s\n", p.ConciseString(ltc.ShortenDictionary, c.maxWidth))
315 }
316 }
317 }()
318 }
319
320 return nil
321}