blob: d92f7edd8179406346bd0bab7f13b02a760f7c58 [file] [log] [blame]
Serge Bazanskieac8f732021-10-05 23:30:37 +02001// minit is a barebones Linux-compatible init (PID 1) process.
2//
3// Its goal is to run the Metropolis core executable and reap any children that
4// it stumbles upon. It does not support running under a TTY and is not
5// configurable in any way.
6//
7// The only reason this exists is because Go's child process reaping (when
8// using os/exec.Command) races any PID 1 process reaping, thereby preventing
9// running a complex Go binary as PID 1. In the future this might be rewritten
10// in a memory-safe language like Zig or Rust, but this implementation will do
11// for now, as long as it keeps having basically zero attack surface.
12//
13// This code has been vaguely inspired by github.com/Yelp/dumb-init and
14// github.com/krallin/tini, two already existing minimal init implementations.
15// These, however, attempt to handle being run in a TTY and some
16// configurability, as they're meant to be run in containers. We don't need any
17// of that, and we'd rather have as little C as possible.
18
19#include <errno.h>
20#include <linux/reboot.h>
21#include <signal.h>
Serge Bazanski83a28c92022-04-19 13:59:38 +020022#include <stdarg.h>
Serge Bazanskieac8f732021-10-05 23:30:37 +020023#include <stdio.h>
24#include <stdlib.h>
25#include <string.h>
26#include <sys/reboot.h>
27#include <sys/wait.h>
28#include <unistd.h>
29
30void handle_signal(pid_t child_pid, int signum);
31
Lorenz Brunf0b22ff2023-05-02 16:04:20 +020032#define NUM_CONSOLES 4
Serge Bazanski83a28c92022-04-19 13:59:38 +020033FILE *consoles[NUM_CONSOLES] = {};
34
35// open_consoles populates the consoles array with FILE pointers to opened
36// character devices that should receive log messages. Some of these pointers
37// are likely to be null, meaning that particular console is not available.
38void open_consoles() {
39 consoles[0] = fopen("/dev/console", "w");
40 consoles[1] = fopen("/dev/tty0", "w");
41 consoles[2] = fopen("/dev/ttyS0", "w");
Lorenz Brunf0b22ff2023-05-02 16:04:20 +020042 consoles[3] = fopen("/dev/ttyS1", "w");
Serge Bazanski83a28c92022-04-19 13:59:38 +020043
44 // Set all open consoles to be line-buffered.
45 for (int i = 0; i < NUM_CONSOLES; i++) {
46 if (consoles[i] == NULL) {
47 continue;
48 }
49 setvbuf(consoles[i], NULL, _IOLBF, BUFSIZ);
50 }
51
52 // TODO(q3k): disable hardware and software flow control on TTYs. This
53 // shouldn't be necessary on our current platform, but should be ensured
54 // regardless, to make sure we never block writing to any console.
55}
56
57// cprintf emits a format string to all opened consoles.
58void cprintf(const char *fmt, ...) {
59 va_list args;
60 va_start(args, fmt);
61
62 for (int i = 0; i < NUM_CONSOLES; i++) {
63 FILE *console = consoles[i];
64 if (console == NULL) {
65 continue;
66 }
67 vfprintf(console, fmt, args);
68 }
69
70 va_end(args);
71}
72
Serge Bazanskieac8f732021-10-05 23:30:37 +020073int main() {
74 // Block all signals. We'll unblock them in the child.
75 sigset_t all_signals;
76 sigfillset(&all_signals);
77 sigprocmask(SIG_BLOCK, &all_signals, NULL);
78
Serge Bazanski83a28c92022-04-19 13:59:38 +020079 open_consoles();
80
Serge Bazanskieac8f732021-10-05 23:30:37 +020081 // Say hello.
Serge Bazanski83a28c92022-04-19 13:59:38 +020082 cprintf(
Serge Bazanskieac8f732021-10-05 23:30:37 +020083 "\n"
84 " Metropolis Cluster Operating System\n"
Serge Bazanski83a28c92022-04-19 13:59:38 +020085 " Copyright 2020-2022 The Monogon Project Authors\n"
Serge Bazanskieac8f732021-10-05 23:30:37 +020086 "\n"
87 );
88
89
90 pid_t pid = fork();
91 if (pid < 0) {
Serge Bazanski83a28c92022-04-19 13:59:38 +020092 cprintf("fork(): %s\n", strerror(errno));
Serge Bazanskieac8f732021-10-05 23:30:37 +020093 return 1;
94 }
95
96 if (pid == 0) {
97 // In the child. Unblock all signals.
98 sigprocmask(SIG_UNBLOCK, &all_signals, NULL);
99 if (setsid() == -1) {
Serge Bazanski83a28c92022-04-19 13:59:38 +0200100 cprintf("setsid: %s\n", strerror(errno));
Serge Bazanskieac8f732021-10-05 23:30:37 +0200101 return 1;
102 }
103
104 // Then, start the core executable.
105 char *argv[] = {
106 "/core",
107 NULL,
108 };
109 execvp(argv[0], argv);
Serge Bazanski83a28c92022-04-19 13:59:38 +0200110 cprintf("execvpe(/core) failed: %s\n", strerror(errno));
Serge Bazanskieac8f732021-10-05 23:30:37 +0200111 return 1;
112 }
113
114 // In the parent. Wait for any signal, then handle it and any other pending
115 // ones.
116 for (;;) {
117 int signum;
118 sigwait(&all_signals, &signum);
119 handle_signal(pid, signum);
120 }
121}
122
123// handle_signal is called by the main reap loop for every signal received. It
124// reaps children if SIGCHLD is received, and otherwise dispatches the signal to
125// its direct child.
126void handle_signal(pid_t child_pid, int signum) {
127 // Anything other than SIGCHLD should just be forwarded to the child.
128 if (signum != SIGCHLD) {
129 kill(-child_pid, signum);
130 return;
131 }
132
133 // A SIGCHLD was received. Go through all children and reap them, checking
134 // if any of them is our direct child.
135
136 // exit_status will be set if the direct child process exited.
137 int exit_status = -1;
138
139 pid_t killed_pid;
140 int status;
141 while ((killed_pid = waitpid(-1, &status, WNOHANG)) > 0) {
142 if (killed_pid != child_pid) {
143 // Something else than our direct child died, just reap it.
144 continue;
145 }
146
147 // Our direct child exited. Translate its status into an exit code.
148 if (WIFEXITED(status)) {
149 // For processes which exited, just use the exit code directly.
150 exit_status = WEXITSTATUS(status);
151 } else if (WIFSIGNALED(status)) {
152 // Otherwise, emulate what sh/bash do and return 128 + the signal
153 // number that the child received.
154 exit_status = 128 + WTERMSIG(status);
155 } else {
156 // Something unexpected happened. Attempt to handle this gracefully,
157 // but complain.
Serge Bazanski83a28c92022-04-19 13:59:38 +0200158 cprintf("child status not EXITED nor SIGNALED: %d\n", status);
Serge Bazanskieac8f732021-10-05 23:30:37 +0200159 exit_status = 1;
160 }
Serge Bazanskieac8f732021-10-05 23:30:37 +0200161
Lorenz Brun4025c9b2022-06-16 16:12:53 +0000162 // Direct child exited, let's also exit.
163 if (exit_status >= 0) {
164 if (exit_status == 0) {
165 reboot(LINUX_REBOOT_CMD_RESTART);
166 return;
167 }
168 cprintf("\n Metropolis encountered an uncorrectable error and this node must be restarted.\n");
169 cprintf("core exit status: %d\n", exit_status);
170 sync();
Serge Bazanski83a28c92022-04-19 13:59:38 +0200171 cprintf(" Disks synced, rebooting...\n\n");
Lorenz Brun4025c9b2022-06-16 16:12:53 +0000172 reboot(LINUX_REBOOT_CMD_RESTART);
Serge Bazanskieac8f732021-10-05 23:30:37 +0200173 }
Serge Bazanskieac8f732021-10-05 23:30:37 +0200174 }
175}