blob: 834b023995a8e6d06b43793b026d41a01c89c2b4 [file] [log] [blame]
Serge Bazanskieac8f732021-10-05 23:30:37 +02001// minit is a barebones Linux-compatible init (PID 1) process.
2//
3// Its goal is to run the Metropolis core executable and reap any children that
4// it stumbles upon. It does not support running under a TTY and is not
5// configurable in any way.
6//
7// The only reason this exists is because Go's child process reaping (when
8// using os/exec.Command) races any PID 1 process reaping, thereby preventing
9// running a complex Go binary as PID 1. In the future this might be rewritten
10// in a memory-safe language like Zig or Rust, but this implementation will do
11// for now, as long as it keeps having basically zero attack surface.
12//
13// This code has been vaguely inspired by github.com/Yelp/dumb-init and
14// github.com/krallin/tini, two already existing minimal init implementations.
15// These, however, attempt to handle being run in a TTY and some
16// configurability, as they're meant to be run in containers. We don't need any
17// of that, and we'd rather have as little C as possible.
18
19#include <errno.h>
20#include <linux/reboot.h>
21#include <signal.h>
Serge Bazanski83a28c92022-04-19 13:59:38 +020022#include <stdarg.h>
Serge Bazanskieac8f732021-10-05 23:30:37 +020023#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <sys/reboot.h>
27#include <sys/wait.h>
28#include <unistd.h>
29
Jan Schär10670e52025-04-23 12:54:48 +000030#include "metropolis/node/core/minit/stamp.h"
31
Serge Bazanskieac8f732021-10-05 23:30:37 +020032void handle_signal(pid_t child_pid, int signum);
33
Serge Bazanskif538ce42024-09-03 12:17:25 +020034#define NUM_CONSOLES 3
Serge Bazanski83a28c92022-04-19 13:59:38 +020035FILE *consoles[NUM_CONSOLES] = {};
36
37// open_consoles populates the consoles array with FILE pointers to opened
38// character devices that should receive log messages. Some of these pointers
39// are likely to be null, meaning that particular console is not available.
40void open_consoles() {
Serge Bazanskif538ce42024-09-03 12:17:25 +020041 consoles[0] = fopen("/dev/tty0", "w");
42 consoles[1] = fopen("/dev/ttyS0", "w");
43 consoles[2] = fopen("/dev/ttyS1", "w");
Serge Bazanski83a28c92022-04-19 13:59:38 +020044
45 // Set all open consoles to be line-buffered.
46 for (int i = 0; i < NUM_CONSOLES; i++) {
47 if (consoles[i] == NULL) {
48 continue;
49 }
50 setvbuf(consoles[i], NULL, _IOLBF, BUFSIZ);
51 }
52
53 // TODO(q3k): disable hardware and software flow control on TTYs. This
54 // shouldn't be necessary on our current platform, but should be ensured
55 // regardless, to make sure we never block writing to any console.
56}
57
58// cprintf emits a format string to all opened consoles.
59void cprintf(const char *fmt, ...) {
60 va_list args;
61 va_start(args, fmt);
62
63 for (int i = 0; i < NUM_CONSOLES; i++) {
64 FILE *console = consoles[i];
65 if (console == NULL) {
66 continue;
67 }
68 vfprintf(console, fmt, args);
69 }
70
71 va_end(args);
72}
73
Serge Bazanskieac8f732021-10-05 23:30:37 +020074int main() {
75 // Block all signals. We'll unblock them in the child.
76 sigset_t all_signals;
77 sigfillset(&all_signals);
78 sigprocmask(SIG_BLOCK, &all_signals, NULL);
79
Serge Bazanski83a28c92022-04-19 13:59:38 +020080 open_consoles();
81
Serge Bazanskieac8f732021-10-05 23:30:37 +020082 // Say hello.
Serge Bazanski83a28c92022-04-19 13:59:38 +020083 cprintf(
Serge Bazanskieac8f732021-10-05 23:30:37 +020084 "\n"
85 " Metropolis Cluster Operating System\n"
Jan Schär10670e52025-04-23 12:54:48 +000086 " " COPYRIGHT_LINE "\n"
Serge Bazanskieac8f732021-10-05 23:30:37 +020087 "\n"
88 );
89
90
91 pid_t pid = fork();
92 if (pid < 0) {
Serge Bazanski83a28c92022-04-19 13:59:38 +020093 cprintf("fork(): %s\n", strerror(errno));
Serge Bazanskieac8f732021-10-05 23:30:37 +020094 return 1;
95 }
96
97 if (pid == 0) {
98 // In the child. Unblock all signals.
99 sigprocmask(SIG_UNBLOCK, &all_signals, NULL);
100 if (setsid() == -1) {
Serge Bazanski83a28c92022-04-19 13:59:38 +0200101 cprintf("setsid: %s\n", strerror(errno));
Serge Bazanskieac8f732021-10-05 23:30:37 +0200102 return 1;
103 }
104
105 // Then, start the core executable.
106 char *argv[] = {
107 "/core",
108 NULL,
109 };
110 execvp(argv[0], argv);
Serge Bazanski83a28c92022-04-19 13:59:38 +0200111 cprintf("execvpe(/core) failed: %s\n", strerror(errno));
Serge Bazanskieac8f732021-10-05 23:30:37 +0200112 return 1;
113 }
114
115 // In the parent. Wait for any signal, then handle it and any other pending
116 // ones.
117 for (;;) {
118 int signum;
119 sigwait(&all_signals, &signum);
120 handle_signal(pid, signum);
121 }
122}
123
124// handle_signal is called by the main reap loop for every signal received. It
125// reaps children if SIGCHLD is received, and otherwise dispatches the signal to
126// its direct child.
127void handle_signal(pid_t child_pid, int signum) {
128 // Anything other than SIGCHLD should just be forwarded to the child.
129 if (signum != SIGCHLD) {
130 kill(-child_pid, signum);
131 return;
132 }
133
134 // A SIGCHLD was received. Go through all children and reap them, checking
135 // if any of them is our direct child.
136
137 // exit_status will be set if the direct child process exited.
138 int exit_status = -1;
139
140 pid_t killed_pid;
141 int status;
142 while ((killed_pid = waitpid(-1, &status, WNOHANG)) > 0) {
143 if (killed_pid != child_pid) {
144 // Something else than our direct child died, just reap it.
145 continue;
146 }
147
148 // Our direct child exited. Translate its status into an exit code.
149 if (WIFEXITED(status)) {
150 // For processes which exited, just use the exit code directly.
151 exit_status = WEXITSTATUS(status);
152 } else if (WIFSIGNALED(status)) {
153 // Otherwise, emulate what sh/bash do and return 128 + the signal
154 // number that the child received.
155 exit_status = 128 + WTERMSIG(status);
156 } else {
157 // Something unexpected happened. Attempt to handle this gracefully,
158 // but complain.
Serge Bazanski83a28c92022-04-19 13:59:38 +0200159 cprintf("child status not EXITED nor SIGNALED: %d\n", status);
Serge Bazanskieac8f732021-10-05 23:30:37 +0200160 exit_status = 1;
161 }
Serge Bazanskieac8f732021-10-05 23:30:37 +0200162
Lorenz Brun4025c9b2022-06-16 16:12:53 +0000163 // Direct child exited, let's also exit.
164 if (exit_status >= 0) {
165 if (exit_status == 0) {
166 reboot(LINUX_REBOOT_CMD_RESTART);
167 return;
168 }
169 cprintf("\n Metropolis encountered an uncorrectable error and this node must be restarted.\n");
170 cprintf("core exit status: %d\n", exit_status);
171 sync();
Serge Bazanski83a28c92022-04-19 13:59:38 +0200172 cprintf(" Disks synced, rebooting...\n\n");
Lorenz Brun4025c9b2022-06-16 16:12:53 +0000173 reboot(LINUX_REBOOT_CMD_RESTART);
Serge Bazanskieac8f732021-10-05 23:30:37 +0200174 }
Serge Bazanskieac8f732021-10-05 23:30:37 +0200175 }
176}