metropolis/node: move misplaced packages out of core

abloader, bios_bootcode and minit don't run as part of the core process,
so it doesn't make sense to have them in //metropolis/node/core.
This changes moves these three to //metropolis/node.

Change-Id: I908efb311a138f07a9f1de8e3c23437ff00131ee
Reviewed-on: https://review.monogon.dev/c/monogon/+/4196
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
Tested-by: Jenkins CI
diff --git a/metropolis/node/minit/BUILD.bazel b/metropolis/node/minit/BUILD.bazel
new file mode 100644
index 0000000..bff1f69
--- /dev/null
+++ b/metropolis/node/minit/BUILD.bazel
@@ -0,0 +1,21 @@
+load("@aspect_bazel_lib//lib:expand_template.bzl", "expand_template")
+load("@rules_cc//cc:defs.bzl", "cc_binary")
+
+expand_template(
+    name = "stamp",
+    out = "stamp.h",
+    stamp = 1,
+    stamp_substitutions = {"copyright": "{{STABLE_MONOGON_copyright}}"},
+    template = ["#define COPYRIGHT_LINE \"copyright\""],
+)
+
+cc_binary(
+    name = "minit",
+    srcs = [
+        "main.c",
+        ":stamp",
+    ],
+    visibility = [
+        "//metropolis/node:__pkg__",
+    ],
+)
diff --git a/metropolis/node/minit/main.c b/metropolis/node/minit/main.c
new file mode 100644
index 0000000..c2d07c1
--- /dev/null
+++ b/metropolis/node/minit/main.c
@@ -0,0 +1,176 @@
+// minit is a barebones Linux-compatible init (PID 1) process.
+//
+// Its goal is to run the Metropolis core executable and reap any children that
+// it stumbles upon. It does not support running under a TTY and is not
+// configurable in any way.
+//
+// The only reason this exists is because Go's child process reaping (when
+// using os/exec.Command) races any PID 1 process reaping, thereby preventing
+// running a complex Go binary as PID 1. In the future this might be rewritten
+// in a memory-safe language like Zig or Rust, but this implementation will do
+// for now, as long as it keeps having basically zero attack surface.
+//
+// This code has been vaguely inspired by github.com/Yelp/dumb-init and
+// github.com/krallin/tini, two already existing minimal init implementations.
+// These, however, attempt to handle being run in a TTY and some
+// configurability, as they're meant to be run in containers. We don't need any
+// of that, and we'd rather have as little C as possible.
+
+#include <errno.h>
+#include <linux/reboot.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/reboot.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "metropolis/node/minit/stamp.h"
+
+void handle_signal(pid_t child_pid, int signum);
+
+#define NUM_CONSOLES 3
+FILE *consoles[NUM_CONSOLES] = {};
+
+// open_consoles populates the consoles array with FILE pointers to opened
+// character devices that should receive log messages. Some of these pointers
+// are likely to be null, meaning that particular console is not available.
+void open_consoles() {
+    consoles[0] = fopen("/dev/tty0", "w");
+    consoles[1] = fopen("/dev/ttyS0", "w");
+    consoles[2] = fopen("/dev/ttyS1", "w");
+
+    // Set all open consoles to be line-buffered.
+    for (int i = 0; i < NUM_CONSOLES; i++) {
+        if (consoles[i] == NULL) {
+            continue;
+        }
+        setvbuf(consoles[i], NULL, _IOLBF, BUFSIZ);
+    }
+
+    // TODO(q3k): disable hardware and software flow control on TTYs. This
+    // shouldn't be necessary on our current platform, but should be ensured
+    // regardless, to make sure we never block writing to any console.
+}
+
+// cprintf emits a format string to all opened consoles.
+void cprintf(const char *fmt, ...) {
+    va_list args;
+    va_start(args, fmt);
+
+    for (int i = 0; i < NUM_CONSOLES; i++) {
+        FILE *console = consoles[i];
+        if (console == NULL) {
+            continue;
+        }
+        vfprintf(console, fmt, args);
+    }
+
+    va_end(args);
+}
+
+int main() {
+    // Block all signals. We'll unblock them in the child.
+    sigset_t all_signals;
+    sigfillset(&all_signals);
+    sigprocmask(SIG_BLOCK, &all_signals, NULL);
+
+    open_consoles();
+
+    // Say hello.
+    cprintf(
+        "\n"
+        "  Metropolis Cluster Operating System\n"
+        "  " COPYRIGHT_LINE "\n"
+        "\n"
+    );
+
+
+    pid_t pid = fork();
+    if (pid < 0) {
+        cprintf("fork(): %s\n", strerror(errno));
+        return 1;
+    }
+
+    if (pid == 0) {
+        // In the child. Unblock all signals.
+        sigprocmask(SIG_UNBLOCK, &all_signals, NULL);
+        if (setsid() == -1) {
+            cprintf("setsid: %s\n", strerror(errno));
+            return 1;
+        }
+
+        // Then, start the core executable.
+        char *argv[] = {
+            "/core",
+            NULL,
+        };
+        execvp(argv[0], argv);
+        cprintf("execvpe(/core) failed: %s\n", strerror(errno));
+        return 1;
+    }
+
+    // In the parent. Wait for any signal, then handle it and any other pending
+    // ones.
+    for (;;) {
+        int signum;
+        sigwait(&all_signals, &signum);
+        handle_signal(pid, signum);
+    }
+}
+
+// handle_signal is called by the main reap loop for every signal received. It
+// reaps children if SIGCHLD is received, and otherwise dispatches the signal to
+// its direct child.
+void handle_signal(pid_t child_pid, int signum) {
+    // Anything other than SIGCHLD should just be forwarded to the child.
+    if (signum != SIGCHLD) {
+        kill(-child_pid, signum);
+        return;
+    }
+
+    // A SIGCHLD was received. Go through all children and reap them, checking
+    // if any of them is our direct child.
+
+    // exit_status will be set if the direct child process exited.
+    int exit_status = -1;
+
+    pid_t killed_pid;
+    int status;
+    while ((killed_pid = waitpid(-1, &status, WNOHANG)) > 0) {
+        if (killed_pid != child_pid) {
+            // Something else than our direct child died, just reap it.
+            continue;
+        }
+
+        // Our direct child exited. Translate its status into an exit code.
+        if (WIFEXITED(status)) {
+            // For processes which exited, just use the exit code directly.
+            exit_status = WEXITSTATUS(status);
+        } else if (WIFSIGNALED(status)) {
+            // Otherwise, emulate what sh/bash do and return 128 + the signal
+            // number that the child received.
+            exit_status = 128 + WTERMSIG(status);
+        } else {
+            // Something unexpected happened. Attempt to handle this gracefully,
+            // but complain.
+            cprintf("child status not EXITED nor SIGNALED: %d\n", status);
+            exit_status = 1;
+        }
+
+        // Direct child exited, let's also exit.
+        if (exit_status >= 0) {
+            if (exit_status == 0) {
+                reboot(LINUX_REBOOT_CMD_RESTART);
+                return;
+            }
+            cprintf("\n Metropolis encountered an uncorrectable error and this node must be restarted.\n");
+            cprintf("core exit status: %d\n", exit_status);
+            sync();
+            cprintf("  Disks synced, rebooting...\n\n");
+            reboot(LINUX_REBOOT_CMD_RESTART);
+        }
+    }
+}