blob: 5095bfb056c79515832a57609efd2061b2cbcd47 [file] [log] [blame]
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +02001// Package bringup implements a simple wrapper which configures all default
2// mounts, logging and the corresponding forwarders to tty0 and ttyS0. It
3// then configures a new logtree and starts a supervisor to run the provided
Tim Windelschmidt83da4222024-12-16 02:49:50 +01004// supervisor.Runnable. Said Runnable is expected to return no error. If it
5// does, the supervisor will exit, an error will be printed and the system will
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +02006// reboot after five seconds.
7package bringup
8
9import (
Tim Windelschmidt83da4222024-12-16 02:49:50 +010010 "bufio"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020011 "context"
Tim Windelschmidt83da4222024-12-16 02:49:50 +010012 "errors"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020013 "fmt"
14 "os"
Tim Windelschmidt83da4222024-12-16 02:49:50 +010015 "runtime/debug"
16 "strings"
17 "sync/atomic"
18 "syscall"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020019 "time"
20
Tim Windelschmidt83da4222024-12-16 02:49:50 +010021 "github.com/opencontainers/runc/libcontainer/cgroups"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020022 "go.uber.org/multierr"
23 "golang.org/x/sys/unix"
24
Tim Windelschmidt83da4222024-12-16 02:49:50 +010025 "source.monogon.dev/go/logging"
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020026 "source.monogon.dev/osbase/bootparam"
27 "source.monogon.dev/osbase/efivarfs"
28 "source.monogon.dev/osbase/logtree"
29 "source.monogon.dev/osbase/supervisor"
30)
31
Tim Windelschmidt83da4222024-12-16 02:49:50 +010032type Config struct {
33 Console ConsoleConfig
34 Supervisor SupervisorConfig
35}
36
37type ConsoleConfig struct {
38 ShortenDictionary logtree.ShortenDictionary
39
40 // Filter is used to filter out some uselessly verbose logs from the
41 // console. It should return true if an entry is allowed to be printed.
42 Filter func(*logtree.LogEntry) bool
43}
44
45type SupervisorConfig struct {
46 Metrics []supervisor.Metrics
47}
48
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020049type Runnable supervisor.Runnable
50
51func (r Runnable) Run() {
Tim Windelschmidt83da4222024-12-16 02:49:50 +010052 r.RunWith(Config{})
53}
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020054
Tim Windelschmidt83da4222024-12-16 02:49:50 +010055func (r Runnable) RunWith(cfg Config) {
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020056 if err := setupMounts(); err != nil {
57 // We cannot do anything if we fail to mount.
58 panic(err)
59 }
60
Tim Windelschmidt83da4222024-12-16 02:49:50 +010061 // Root system logtree.
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020062 lt := logtree.New()
Tim Windelschmidt83da4222024-12-16 02:49:50 +010063
64 // Collect serial consoles from cmdline and defaults.
65 serialConsoles := collectConsoles()
66
67 // Setup console writers
68 if err := setupConsoles(lt, serialConsoles, cfg.Console); err != nil {
69 panic(err)
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020070 }
71
Tim Windelschmidt7dac92b2024-12-16 02:51:04 +010072 // Initialize persistent panic handler
73 initPanicHandler(lt, serialConsoles)
74
Tim Windelschmidt83da4222024-12-16 02:49:50 +010075 // Rewire os.Stdout and os.Stderr to logtree which then is printed
76 // to serial consoles.
77 if err := rewireStdIO(lt); err != nil {
78 panic(err)
79 }
80
Tim Windelschmidt83da4222024-12-16 02:49:50 +010081 // Initial logger. Used until we get to a supervisor.
82 logger := lt.MustLeveledFor("init")
83
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020084 sCtx, cancel := context.WithCancelCause(context.Background())
85
Tim Windelschmidt83da4222024-12-16 02:49:50 +010086 supervisorOptions := []supervisor.SupervisorOpt{
87 supervisor.WithExistingLogtree(lt),
88 }
89
90 for _, m := range cfg.Supervisor.Metrics {
91 supervisorOptions = append(supervisorOptions, supervisor.WithMetrics(m))
92 }
93
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020094 // Don't reschedule the root runnable...
Tim Windelschmidt83da4222024-12-16 02:49:50 +010095 var started atomic.Bool
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +020096 supervisor.New(sCtx, func(ctx context.Context) (err error) {
97 defer func() {
98 if r := recover(); r != nil {
Tim Windelschmidt83da4222024-12-16 02:49:50 +010099 err = fmt.Errorf("root runnable paniced: \n%s:\n%s", r, debug.Stack())
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200100 cancel(err)
101 }
102 }()
103
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100104 if started.Swap(true) {
105 err = fmt.Errorf("root runnable restarted")
106 cancel(err)
107 return
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200108 }
109
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100110 if err := supervisor.Run(ctx, "pstore", dumpAndCleanPstore); err != nil {
111 return fmt.Errorf("when starting pstore: %w", err)
112 }
113
114 err = r(ctx)
115 if err != nil {
116 cancel(err)
117 return
118 }
119
120 return
121 }, supervisorOptions...)
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200122
123 <-sCtx.Done()
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100124
125 time.Sleep(time.Second)
126
127 // Write final messages on panic to stderr.
128 logger.Errorf("Fatal error: %+v", context.Cause(sCtx))
129 logger.Error("This node could not be started. Rebooting...")
130 time.Sleep(time.Second)
131
132 // After a bit, kill all console log readers.
133 for _, c := range serialConsoles {
134 if c.reader == nil {
135 continue
136 }
137 c.reader.Close()
138 c.reader.Stream = nil
139 }
140
141 // Wait for final logs to flush to console...
142 time.Sleep(5 * time.Second)
143 unix.Sync()
144 unix.Reboot(unix.LINUX_REBOOT_CMD_RESTART)
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200145}
146
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100147func rewireStdIO(lt *logtree.LogTree) error {
148 if err := rewireFD(lt, "stderr", os.Stderr, logging.Leveled.Error); err != nil {
149 return fmt.Errorf("failed rewiring stderr: %w", err)
150 }
151 if err := rewireFD(lt, "stdout", os.Stdout, logging.Leveled.Info); err != nil {
152 return fmt.Errorf("failed rewiring stdout: %w", err)
153 }
154 return nil
155}
156
157func rewireFD(lt *logtree.LogTree, dn logtree.DN, f *os.File, writeLog func(logging.Leveled, ...any)) error {
158 r, w, err := os.Pipe()
159 if err != nil {
160 return fmt.Errorf("creating pipe for %q: %w", dn, err)
161 }
162 defer w.Close()
163 // We don't need to close this pipe since we need it for the entire
164 // process lifetime.
165
166 l := lt.MustLeveledFor(dn)
167 go func() {
168 r := bufio.NewReader(r)
169 for {
170 line, err := r.ReadString('\n')
171 if err != nil {
172 panic(err)
173 }
174
175 writeLog(l, strings.TrimRight(line, "\n"))
176 }
177 }()
178
179 wConn, err := w.SyscallConn()
180 if err != nil {
181 return fmt.Errorf("error getting SyscallConn for %q: %w", dn, err)
182 }
183 fConn, err := f.SyscallConn()
184 if err != nil {
185 return fmt.Errorf("error getting SyscallConn for %q: %w", dn, err)
186 }
187 var wErr, fErr error
188 wErr = wConn.Control(func(wFd uintptr) {
189 fErr = fConn.Control(func(fFd uintptr) {
190 err = syscall.Dup2(int(wFd), int(fFd))
191 })
192 })
193
194 err = errors.Join(wErr, fErr, err)
195 if err != nil {
196 return fmt.Errorf("failed to duplicate file descriptor %q: %w", dn, err)
197 }
198
199 return nil
200}
201
202func mkdirAndMount(dir, fs string, flags uintptr, data string) error {
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200203 if err := os.MkdirAll(dir, 0o755); err != nil {
204 return fmt.Errorf("could not make %s: %w", dir, err)
205 }
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100206 if err := unix.Mount(fs, dir, fs, flags, data); err != nil {
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200207 return fmt.Errorf("could not mount %s on %s: %w", fs, dir, err)
208 }
209 return nil
210}
211
212// setupMounts sets up basic mounts like sysfs, procfs, devtmpfs and cgroups.
213// This should be called early during init as a lot of processes depend on this
214// being available.
215func setupMounts() (err error) {
216 // Set up target filesystems.
217 for _, el := range []struct {
218 dir string
219 fs string
220 flags uintptr
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100221 data string
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200222 }{
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100223 {"/sys", "sysfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
224 {"/sys/kernel/tracing", "tracefs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
225 {"/sys/fs/pstore", "pstore", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
226 {"/proc", "proc", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
227 {"/dev", "devtmpfs", unix.MS_NOEXEC | unix.MS_NOSUID, ""},
228 {"/dev/pts", "devpts", unix.MS_NOEXEC | unix.MS_NOSUID, ""},
229 // Nothing currently uses /dev/shm, but it's required
230 // by containerd when the host IPC namespace is shared, which
231 // is required by "kubectl debug node/" and specific customer applications.
232 // https://github.com/monogon-dev/monogon/issues/305.
233 {"/dev/shm", "tmpfs", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, ""},
234 {"/sys/fs/cgroup", "cgroup2", unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV, "nsdelegate,memory_recursiveprot"},
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200235 } {
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100236 err = multierr.Append(err, mkdirAndMount(el.dir, el.fs, el.flags, el.data))
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200237 }
238
239 // We try to mount efivarfs but ignore any error,
240 // as we don't want to crash on non-EFI systems.
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100241 _ = mkdirAndMount(efivarfs.Path, "efivarfs", unix.MS_NOEXEC|unix.MS_NOSUID|unix.MS_NODEV, "")
242
243 // Create main cgroup "everything" and move ourselves into it.
244 err = multierr.Append(err, os.Mkdir("/sys/fs/cgroup/everything", 0755))
245 err = multierr.Append(err, cgroups.WriteCgroupProc("/sys/fs/cgroup/everything", os.Getpid()))
Tim Windelschmidt18e9a3f2024-04-08 21:51:03 +0200246 return
247}
Tim Windelschmidt83da4222024-12-16 02:49:50 +0100248
249type console struct {
250 path string
251 maxWidth int
252 reader *logtree.LogReader
253}
254
255func collectConsoles() []*console {
256 const defaultMaxWidth = 120
257
258 // Add the two standard tty0/ttyS0 consoles
259 consoles := map[string]int{
260 "tty0": defaultMaxWidth,
261 "ttyS0": defaultMaxWidth,
262 }
263
264 // Parse consoles from the kernel command line.
265 cmdline, err := os.ReadFile("/proc/cmdline")
266 if err == nil {
267 params, _, err := bootparam.Unmarshal(string(cmdline))
268 if err == nil {
269 for v := range params.Consoles() {
270 consoles[v] = defaultMaxWidth
271 }
272 }
273 }
274
275 var serialConsoles []*console
276 for consolePath, maxWidth := range consoles {
277 serialConsoles = append(serialConsoles, &console{
278 path: "/dev/" + consolePath,
279 maxWidth: maxWidth,
280 })
281 }
282
283 return serialConsoles
284}
285
286func setupConsoles(lt *logtree.LogTree, serialConsoles []*console, ltc ConsoleConfig) error {
287 filterFn := ltc.Filter
288 if filterFn == nil {
289 filterFn = func(*logtree.LogEntry) bool {
290 return true
291 }
292 }
293
294 // Open up consoles and set up logging from logtree and crash channel.
295 for _, c := range serialConsoles {
296 f, err := os.OpenFile(c.path, os.O_WRONLY, 0)
297 if err != nil {
298 continue
299 }
300
301 reader, err := lt.Read("", logtree.WithChildren(), logtree.WithStream())
302 if err != nil {
303 return fmt.Errorf("could not set up root log reader: %w", err)
304 }
305 c.reader = reader
306
307 go func() {
308 fmt.Fprintf(f, "This is %s. Verbose node logs follow.\n\n", f.Name())
309 for p := range reader.Stream {
310 if filterFn(p) {
311 fmt.Fprintf(f, "%s\n", p.ConciseString(ltc.ShortenDictionary, c.maxWidth))
312 }
313 }
314 }()
315 }
316
317 return nil
318}