blob: 8fcb8fec3b44dec283af08dd1f16a81d12d4db7d [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +02004// Package bringup implements a simple wrapper which configures all default
5// mounts, logging and the corresponding forwarders to tty0 and ttyS0. It
6// then configures a new logtree and starts a supervisor to run the provided
Tim Windelschmidt83da4222024-12-16 02:49:50 +01007// supervisor.Runnable. Said Runnable is expected to return no error. If it
8// does, the supervisor will exit, an error will be printed and the system will
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +02009// reboot after five seconds.
10package bringup
11
12import (
Tim Windelschmidt83da4222024-12-16 02:49:50 +010013 "bufio"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020014 "context"
Tim Windelschmidt83da4222024-12-16 02:49:50 +010015 "errors"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020016 "fmt"
17 "os"
Tim Windelschmidt83da4222024-12-16 02:49:50 +010018 "runtime/debug"
19 "strings"
20 "sync/atomic"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020021 "time"
22
Tim Windelschmidt83da4222024-12-16 02:49:50 +010023 "github.com/opencontainers/runc/libcontainer/cgroups"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020024 "go.uber.org/multierr"
25 "golang.org/x/sys/unix"
26
Tim Windelschmidt83da4222024-12-16 02:49:50 +010027 "source.monogon.dev/go/logging"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020028 "source.monogon.dev/osbase/bootparam"
29 "source.monogon.dev/osbase/efivarfs"
30 "source.monogon.dev/osbase/logtree"
31 "source.monogon.dev/osbase/supervisor"
32)
33
Tim Windelschmidt83da4222024-12-16 02:49:50 +010034type Config struct {
35 Console ConsoleConfig
36 Supervisor SupervisorConfig
37}
38
39type ConsoleConfig struct {
40 ShortenDictionary logtree.ShortenDictionary
41
42 // Filter is used to filter out some uselessly verbose logs from the
43 // console. It should return true if an entry is allowed to be printed.
44 Filter func(*logtree.LogEntry) bool
45}
46
47type SupervisorConfig struct {
48 Metrics []supervisor.Metrics
49}
50
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020051type Runnable supervisor.Runnable
52
53func (r Runnable) Run() {
Tim Windelschmidt83da4222024-12-16 02:49:50 +010054 r.RunWith(Config{})
55}
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020056
Tim Windelschmidt83da4222024-12-16 02:49:50 +010057func (r Runnable) RunWith(cfg Config) {
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020058 if err := setupMounts(); err != nil {
59 // We cannot do anything if we fail to mount.
60 panic(err)
61 }
62
Tim Windelschmidt83da4222024-12-16 02:49:50 +010063 // Root system logtree.
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020064 lt := logtree.New()
Tim Windelschmidt83da4222024-12-16 02:49:50 +010065
66 // Collect serial consoles from cmdline and defaults.
67 serialConsoles := collectConsoles()
68
69 // Setup console writers
70 if err := setupConsoles(lt, serialConsoles, cfg.Console); err != nil {
71 panic(err)
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020072 }
73
Tim Windelschmidt7dac92b2024-12-16 02:51:04 +010074 // Initialize persistent panic handler
75 initPanicHandler(lt, serialConsoles)
76
Tim Windelschmidt83da4222024-12-16 02:49:50 +010077 // Rewire os.Stdout and os.Stderr to logtree which then is printed
78 // to serial consoles.
79 if err := rewireStdIO(lt); err != nil {
80 panic(err)
81 }
82
Tim Windelschmidt83da4222024-12-16 02:49:50 +010083 // Initial logger. Used until we get to a supervisor.
84 logger := lt.MustLeveledFor("init")
85
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020086 sCtx, cancel := context.WithCancelCause(context.Background())
87
Tim Windelschmidt83da4222024-12-16 02:49:50 +010088 supervisorOptions := []supervisor.SupervisorOpt{
89 supervisor.WithExistingLogtree(lt),
90 }
91
92 for _, m := range cfg.Supervisor.Metrics {
93 supervisorOptions = append(supervisorOptions, supervisor.WithMetrics(m))
94 }
95
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020096 // Don't reschedule the root runnable...
Tim Windelschmidt83da4222024-12-16 02:49:50 +010097 var started atomic.Bool
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020098 supervisor.New(sCtx, func(ctx context.Context) (err error) {
99 defer func() {
100 if r := recover(); r != nil {
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100101 err = fmt.Errorf("root runnable paniced: \n%s:\n%s", r, debug.Stack())
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200102 cancel(err)
103 }
104 }()
105
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100106 if started.Swap(true) {
107 err = fmt.Errorf("root runnable restarted")
108 cancel(err)
109 return
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200110 }
111
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100112 if err := supervisor.Run(ctx, "pstore", dumpAndCleanPstore); err != nil {
113 return fmt.Errorf("when starting pstore: %w", err)
114 }
115
116 err = r(ctx)
117 if err != nil {
118 cancel(err)
119 return
120 }
121
122 return
123 }, supervisorOptions...)
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200124
125 <-sCtx.Done()
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100126
127 time.Sleep(time.Second)
128
129 // Write final messages on panic to stderr.
130 logger.Errorf("Fatal error: %+v", context.Cause(sCtx))
131 logger.Error("This node could not be started. Rebooting...")
132 time.Sleep(time.Second)
133
134 // After a bit, kill all console log readers.
135 for _, c := range serialConsoles {
136 if c.reader == nil {
137 continue
138 }
139 c.reader.Close()
140 c.reader.Stream = nil
141 }
142
143 // Wait for final logs to flush to console...
144 time.Sleep(5 * time.Second)
145 unix.Sync()
146 unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200147}
148
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100149func rewireStdIO(lt *logtree.LogTree) error {
150 if err := rewireFD(lt, "stderr", os.Stderr, logging.Leveled.Error); err != nil {
151 return fmt.Errorf("failed rewiring stderr: %w", err)
152 }
153 if err := rewireFD(lt, "stdout", os.Stdout, logging.Leveled.Info); err != nil {
154 return fmt.Errorf("failed rewiring stdout: %w", err)
155 }
156 return nil
157}
158
159func rewireFD(lt *logtree.LogTree, dn logtree.DN, f *os.File, writeLog func(logging.Leveled, ...any)) error {
160 r, w, err := os.Pipe()
161 if err != nil {
162 return fmt.Errorf("creating pipe for %q: %w", dn, err)
163 }
164 defer w.Close()
165 // We don't need to close this pipe since we need it for the entire
166 // process lifetime.
167
168 l := lt.MustLeveledFor(dn)
169 go func() {
170 r := bufio.NewReader(r)
171 for {
172 line, err := r.ReadString('\n')
173 if err != nil {
174 panic(err)
175 }
176
177 writeLog(l, strings.TrimRight(line, "\n"))
178 }
179 }()
180
181 wConn, err := w.SyscallConn()
182 if err != nil {
183 return fmt.Errorf("error getting SyscallConn for %q: %w", dn, err)
184 }
185 fConn, err := f.SyscallConn()
186 if err != nil {
187 return fmt.Errorf("error getting SyscallConn for %q: %w", dn, err)
188 }
189 var wErr, fErr error
190 wErr = wConn.Control(func(wFd uintptr) {
191 fErr = fConn.Control(func(fFd uintptr) {
Jan Schär51c67482025-05-05 13:11:55 +0000192 err = unix.Dup2(int(wFd), int(fFd))
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100193 })
194 })
195
196 err = errors.Join(wErr, fErr, err)
197 if err != nil {
198 return fmt.Errorf("failed to duplicate file descriptor %q: %w", dn, err)
199 }
200
201 return nil
202}
203
204func mkdirAndMount(dir, fs string, flags uintptr, data string) error {
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200205 if err := os.MkdirAll(dir, 0o755); err != nil {
206 return fmt.Errorf("could not make %s: %w", dir, err)
207 }
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100208 if err := unix.Mount(fs, dir, fs, flags, data); err != nil {
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200209 return fmt.Errorf("could not mount %s on %s: %w", fs, dir, err)
210 }
211 return nil
212}
213
214// setupMounts sets up basic mounts like sysfs, procfs, devtmpfs and cgroups.
215// This should be called early during init as a lot of processes depend on this
216// being available.
217func setupMounts() (err error) {
218 // Set up target filesystems.
219 for _, el := range []struct {
220 dir string
221 fs string
222 flags uintptr
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100223 data string
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200224 }{
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100225 {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
226 {"/sys/kernel/tracing", "tracefs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
227 {"/sys/fs/pstore", "pstore", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
228 {"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
229 {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID, ""},
230 {"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID, ""},
231 // Nothing currently uses /dev/shm, but it's required
232 // by containerd when the host IPC namespace is shared, which
233 // is required by "kubectl debug node/" and specific customer applications.
234 // https://github.com/monogon-dev/monogon/issues/305.
235 {"/dev/shm", "tmpfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
236 {"/sys/fs/cgroup", "cgroup2", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, "nsdelegate,memory_recursiveprot"},
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200237 } {
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100238 err = multierr.Append(err, mkdirAndMount(el.dir, el.fs, el.flags, el.data))
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200239 }
240
241 // We try to mount efivarfs but ignore any error,
242 // as we don't want to crash on non-EFI systems.
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100243 _ = mkdirAndMount(efivarfs.Path, "efivarfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, "")
244
245 // Create main cgroup "everything" and move ourselves into it.
246 err = multierr.Append(err, os.Mkdir("/sys/fs/cgroup/everything", 0755))
247 err = multierr.Append(err, cgroups.WriteCgroupProc("/sys/fs/cgroup/everything", os.Getpid()))
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200248 return
249}
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100250
251type console struct {
252 path string
253 maxWidth int
254 reader *logtree.LogReader
255}
256
257func collectConsoles() []*console {
258 const defaultMaxWidth = 120
259
260 // Add the two standard tty0/ttyS0 consoles
261 consoles := map[string]int{
262 "tty0": defaultMaxWidth,
263 "ttyS0": defaultMaxWidth,
264 }
265
266 // Parse consoles from the kernel command line.
267 cmdline, err := os.ReadFile("/proc/cmdline")
268 if err == nil {
269 params, _, err := bootparam.Unmarshal(string(cmdline))
270 if err == nil {
271 for v := range params.Consoles() {
272 consoles[v] = defaultMaxWidth
273 }
274 }
275 }
276
277 var serialConsoles []*console
278 for consolePath, maxWidth := range consoles {
279 serialConsoles = append(serialConsoles, &console{
280 path: "/dev/" + consolePath,
281 maxWidth: maxWidth,
282 })
283 }
284
285 return serialConsoles
286}
287
288func setupConsoles(lt *logtree.LogTree, serialConsoles []*console, ltc ConsoleConfig) error {
289 filterFn := ltc.Filter
290 if filterFn == nil {
291 filterFn = func(*logtree.LogEntry) bool {
292 return true
293 }
294 }
295
296 // Open up consoles and set up logging from logtree and crash channel.
297 for _, c := range serialConsoles {
298 f, err := os.OpenFile(c.path, os.O_WRONLY, 0)
299 if err != nil {
300 continue
301 }
302
303 reader, err := lt.Read("", logtree.WithChildren(), logtree.WithStream())
304 if err != nil {
305 return fmt.Errorf("could not set up root log reader: %w", err)
306 }
307 c.reader = reader
308
309 go func() {
310 fmt.Fprintf(f, "This is %s. Verbose node logs follow.\n\n", f.Name())
311 for p := range reader.Stream {
312 if filterFn(p) {
313 fmt.Fprintf(f, "%s\n", p.ConciseString(ltc.ShortenDictionary, c.maxWidth))
314 }
315 }
316 }()
317 }
318
319 return nil
320}