//build/toolchain/musl-host-gcc: implement

This is a cc_toolchain which runs on x86 systems with Linux/gcc and
targets Smalltown via static musl builds.

It is currently unused, but can be tested by trying to build any
cc_binary with
--crosstool_top=//build/toolchain/musl-host-gcc:musl_host_cc_suite .

Test Plan: This has been tested manually by running it against a simple cc_binary. Another revision on top of this will attempt to build mkfs.xfs with it.

X-Origin-Diff: phab/D623
GitOrigin-RevId: ebdf51ee76d9d5a7fd94725c66ef53783f787df7
diff --git a/build/toolchain/BUILD b/build/toolchain/BUILD
index 5bf53d2..78c4ae6 100644
--- a/build/toolchain/BUILD
+++ b/build/toolchain/BUILD
@@ -2,7 +2,17 @@
 
 # Toolchain definitions.
 #
-# We currently define a single custom toolchain: the host_cc toolchain suite.
+# We currently define two toolchains:
+#
+#  - //build/toolchain:host_cc_suite , which is a fully unhermetic host toolchain,
+#    that can be used to build tools for the host.
+#  - //build/toolchain/musl-host-gcc:musl_host_cc_suite , which combines the host's
+#    gcc compiler with a sysroot tarball that targets Smalltown. This can be used to
+#    build C libraries/tools for Smalltown.
+#
+
+# This file defines //build/toolchain:host_cc_suite.
+#
 # This is a C++ toolchain that uses GCC from the host at hardcoded paths. We
 # can get away with this, as currently the entire build is performed in a known
 # container (see: //scripts:create_container.sh). We define this toolchain so
diff --git a/build/toolchain/cc_toolchain_config.bzl b/build/toolchain/cc_toolchain_config.bzl
index b69e06f..11c7736 100644
--- a/build/toolchain/cc_toolchain_config.bzl
+++ b/build/toolchain/cc_toolchain_config.bzl
@@ -14,17 +14,16 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
 
-load("@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", "tool_path")
+load("@bazel_tools//tools/cpp:cc_toolchain_config_lib.bzl", "tool", "tool_path")
 
-# This defines a minimal, non-parametrized toolchain configuration rule that
-# uses the host GCC. For background on why we do this, see
-# //build/toolchain/BUILD.
+# This defines a minimal, barely parametrized toolchain configuration rule that
+# uses the host GCC with some possible overrides.
 
 def _host_cc_toolchain_impl(ctx):
     tool_paths = [
         tool_path(
             name = "gcc",
-            path = "/usr/bin/gcc",
+            path = ctx.attr.gcc,
         ),
         tool_path(
             name = "ld",
@@ -57,10 +56,7 @@
     ]
     return cc_common.create_cc_toolchain_config_info(
         ctx = ctx,
-        cxx_builtin_include_directories = [
-          "/usr/lib/gcc/x86_64-redhat-linux/10/include/",
-          "/usr/include",
-        ],
+        cxx_builtin_include_directories = ctx.attr.host_includes,
         toolchain_identifier = "k8-toolchain",
         host_system_name = "local",
         target_system_name = "local",
@@ -70,10 +66,24 @@
         abi_version = "unknown",
         abi_libc_version = "unknown",
         tool_paths = tool_paths,
+        builtin_sysroot = ctx.attr.sysroot,
     )
 
 host_cc_toolchain_config = rule(
     implementation = _host_cc_toolchain_impl,
-    attrs = {},
+    attrs = {
+        "gcc": attr.string(
+            default = "/usr/bin/gcc",
+        ),
+        "host_includes": attr.string_list(
+            default = [
+                "/usr/lib/gcc/x86_64-redhat-linux/10/include/",
+                "/usr/include",
+            ],
+        ),
+        "sysroot": attr.string(
+            default = "",
+        ),
+    },
     provides = [CcToolchainConfigInfo],
 )
diff --git a/build/toolchain/musl-host-gcc/BUILD b/build/toolchain/musl-host-gcc/BUILD
new file mode 100644
index 0000000..95a59f6
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/BUILD
@@ -0,0 +1,45 @@
+load("//build/toolchain:cc_toolchain_config.bzl", "host_cc_toolchain_config")
+
+# This file defines //build/toolchain/musl-host-gcc:musl_host_cc_suite.
+#
+# This is a C++ toolchain that uses GCC from the host at hardcoded paths, with
+# a pre-built sysroot tarball that targets Smalltown with musl and Linux headers.
+# It's a superset of //build/toolchain:host_cc_suite.
+# For more information, see README.md.
+
+cc_toolchain_suite(
+    name = "musl_host_cc_suite",
+    toolchains = {
+        "k8": ":musl_host_cc_k8_toolchain",
+    },
+    visibility = ["//visibility:public"],
+)
+
+cc_toolchain(
+    name = "musl_host_cc_k8_toolchain",
+    all_files = ":musl_toolchain_files",
+    compiler_files = ":musl_toolchain_files",
+    dwp_files = ":musl_toolchain_files",
+    linker_files = ":musl_toolchain_files",
+    objcopy_files = ":musl_toolchain_files",
+    strip_files = ":musl_toolchain_files",
+    supports_param_files = 0,
+    toolchain_config = ":musl_host_cc_k8_toolchain_config",
+    toolchain_identifier = "host-musl-k8-toolchain",
+)
+
+host_cc_toolchain_config(
+    name = "musl_host_cc_k8_toolchain_config",
+    gcc = "gcc-wrapper.sh",
+    host_includes = [],
+    sysroot = "external/musl_sysroot",
+)
+
+filegroup(
+    name = "musl_toolchain_files",
+    srcs = [
+        ":gcc-wrapper.sh",
+        ":musl.spec",
+        "@musl_sysroot//:all",
+    ],
+)
diff --git a/build/toolchain/musl-host-gcc/README.md b/build/toolchain/musl-host-gcc/README.md
new file mode 100644
index 0000000..585bac2
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/README.md
@@ -0,0 +1,42 @@
+musl-host-gcc
+=============
+
+musl-host-gcc is a Bazel C++ toolchain that uses the machine's host gcc in combination with a pre-built musl, musl headers, and Linux headers.
+
+It is currently used to build the few C binaries we need in Smalltown' runtime.
+
+At some point, this toolchain should be replaced by a fully hermetic toolchain that doesn't depend on the host environment.
+
+Usage
+-----
+
+To use this toolchain explicitely while building a `cc_binary`, do:
+
+    bazel build --crosstool_top=//build/toolchain/musl-host-gcc:musl_host_cc_suite //foo/bar
+
+During an actual build however, the right toolchain should be selected using aspects or other Bazel configurability features, instead of a hardcoded `--crosstool_top`.
+
+Building Toolchain Sysroot Tarball
+----------------------------------
+
+The toolchain's musl/linux components are currently built ahead of time and committed to this repository as `//build/toolchain/musl-host-gcc/toolchain.tar.xz`. This is the 'sysroot' tarball, that contains all headers and libraries required to build against Smalltown.
+
+To build this tarball, run the following commands:
+
+    bazel build //build/toolchain/musl-host-gcc/sysroot
+    cp -f bazel-bin/build/toolchain/musl-host-gcc/sysroot/sysroot.tar.xz build/toolchain/musl-host-gcc/sysroot.tar.xz
+
+Internals
+---------
+
+The toolchain is implemented in the following way:
+
+1. `//build/toolchain/musl-host-gcc/sysroot` is used to build `//build/toolchain/musl-host-gcc/sysroot.tar.xz` which is a tarball that contains all include and binary library files for building against musl for Smalltown (x86\_64 / k8) - thes are musl headers, musl libraries, and linux headers. This tarball is commited to source control.
+1. When building a target that uses the toolchain, the `sysroot.tar.xz` tarball is extracted into an external repository `@musl_sysroot`, via `sysroot.bzl` and `sysroot_repository.bzl`.
+1. A toolchain config is built using `//build/toolchain:cc_toolchain_config.bzl`, which points at `gcc-wrapper.sh` as its gcc entrypoint. `gcc-wrapper.sh` expects to be able to call the host gcc with `musl.spec`.
+1. A toolchain is built in `//build/toolchain/musl-host-gcc:musl_host_cc_suite`, which uses the previously mentioned config, and builds it to contain `gcc-wrapper.sh`, `musl.spec`, and the sysroot tarball.
+
+Quirks
+------
+
+As mentioned above, the musl sysroot is kept in a tarball in this repository. This is obviously suboptimal, but on the other hand gives us an effectively pre-built part of a toolchain. In the future, once we have a hermetic toolchain, a similar tarball might actually contain a fully hermetic toolchain pre-built for k8.
diff --git a/build/toolchain/musl-host-gcc/gcc-wrapper.sh b/build/toolchain/musl-host-gcc/gcc-wrapper.sh
new file mode 100755
index 0000000..a430e75
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/gcc-wrapper.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+exec /usr/bin/gcc "$@" -specs build/toolchain/musl-host-gcc/musl.spec
diff --git a/build/toolchain/musl-host-gcc/musl.spec b/build/toolchain/musl-host-gcc/musl.spec
new file mode 100644
index 0000000..376d0d9
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/musl.spec
@@ -0,0 +1,32 @@
+%rename cpp_options old_cpp_options
+
+*cpp_options:
+-nostdinc %(old_cpp_options) -isystem external/musl_sysroot/include
+
+*cc1:
+%(cc1_cpu) -nostdinc -isystem external/musl_sysroot/include
+
+*link_libgcc:
+-L .%s -L external/musl_sysroot/lib
+
+*libgcc:
+libgcc.a%s %:if-exists(libgcc_eh.a%s)
+
+*startfile:
+%{!shared: external/musl_sysroot/lib/Scrt1.o} external/musl_sysroot/lib/crti.o crtbeginS.o%s
+
+*endfile:
+crtendS.o%s external/musl_sysroot/lib/crtn.o
+
+*link:
+-nostdlib -no-dynamic-linker -static %{rdynamic:-export-dynamic}
+
+*esp_link:
+
+
+*esp_options:
+
+
+*esp_cpp_options:
+
+
diff --git a/build/toolchain/musl-host-gcc/sysroot.bzl b/build/toolchain/musl-host-gcc/sysroot.bzl
new file mode 100644
index 0000000..2f54ced
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/sysroot.bzl
@@ -0,0 +1,26 @@
+#  Copyright 2020 The Monogon Project Authors.
+#
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+load("//build/toolchain/musl-host-gcc:sysroot_repository.bzl", "musl_sysroot_rule")
+
+def musl_sysroot_repositories():
+    """
+    Provides an external repository that contains the extracted musl/linux sysroot.
+    """
+    musl_sysroot_rule(
+        name = "musl_sysroot",
+        snapshot = "//build/toolchain/musl-host-gcc:sysroot.tar.xz",
+    )
diff --git a/build/toolchain/musl-host-gcc/sysroot.tar.xz b/build/toolchain/musl-host-gcc/sysroot.tar.xz
new file mode 100755
index 0000000..e61dba6
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/sysroot.tar.xz
Binary files differ
diff --git a/build/toolchain/musl-host-gcc/sysroot/BUILD b/build/toolchain/musl-host-gcc/sysroot/BUILD
new file mode 100644
index 0000000..62260ae
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/sysroot/BUILD
@@ -0,0 +1,24 @@
+load(":musl.bzl", "musl_headers")
+load(":linux.bzl", "linux_headers")
+load(":tarball.bzl", "musl_gcc_tarball")
+
+linux_headers(
+    name = "linux_headers",
+    src = "@linux//:all",
+    arch = "x86_64",
+    visibility = ["//visibility:public"],
+)
+
+musl_headers(
+    name = "musl_headers",
+    src = "@musl//:all",
+    arch = "x86_64",
+    visibility = ["//visibility:public"],
+)
+
+musl_gcc_tarball(
+    name = "sysroot",
+    musl = "//third_party/musl",
+    musl_headers = ":musl_headers",
+    linux_headers = ":linux_headers",
+)
diff --git a/build/toolchain/musl-host-gcc/sysroot/linux.bzl b/build/toolchain/musl-host-gcc/sysroot/linux.bzl
new file mode 100644
index 0000000..e9cf40a
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/sysroot/linux.bzl
@@ -0,0 +1,44 @@
+#  Copyright 2020 The Monogon Project Authors.
+#
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+load(
+    "//build/utils:detect_root.bzl",
+    "detect_root",
+)
+
+def _linux_headers(ctx):
+    hdrs_name = ctx.attr.name + "_headers"
+    hdrs_dir = ctx.actions.declare_directory(hdrs_name)
+
+    root = detect_root(ctx.attr.src)
+    ctx.actions.run_shell(
+        inputs = ctx.files.src,
+        outputs = [hdrs_dir],
+        progress_message = "Generating Linux Kernel Headers",
+        mnemonic = "LinuxCollectHeaders",
+        arguments = [root, ctx.attr.arch, hdrs_dir.path],
+        use_default_shell_env = True,
+        command = "make -C \"$1\" headers_install ARCH=\"$2\" INSTALL_HDR_PATH=\"$(pwd)/$3\" > /dev/null && mv \"$3/include/\"* \"$3/\" && rmdir \"$3/include\"",
+    )
+    return [DefaultInfo(files=depset([hdrs_dir]))]
+
+linux_headers = rule(
+    implementation = _linux_headers,
+    attrs = {
+        "src": attr.label(mandatory = True),
+        "arch": attr.string(mandatory = True),
+    },
+)
diff --git a/build/toolchain/musl-host-gcc/sysroot/musl.bzl b/build/toolchain/musl-host-gcc/sysroot/musl.bzl
new file mode 100644
index 0000000..5055b83
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/sysroot/musl.bzl
@@ -0,0 +1,44 @@
+#  Copyright 2020 The Monogon Project Authors.
+#
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+load(
+    "//build/utils:detect_root.bzl",
+    "detect_root",
+)
+
+def _musl_headers(ctx):
+    hdrs_name = ctx.attr.name + "_headers"
+    hdrs_dir = ctx.actions.declare_directory(hdrs_name)
+
+    root = detect_root(ctx.attr.src)
+    ctx.actions.run_shell(
+        inputs = ctx.files.src,
+        outputs = [hdrs_dir],
+        progress_message = "Collecting musl headers",
+        mnemonic = "MuslCollectHeaders",
+        arguments = [root, ctx.attr.arch, hdrs_dir.path],
+        use_default_shell_env = True,
+        command = "make -C \"$1\" install-headers ARCH=\"$2\" includedir=\"$(pwd)/$3\" > /dev/null",
+    )
+    return [DefaultInfo(files=depset([hdrs_dir]))]
+
+musl_headers = rule(
+    implementation = _musl_headers,
+    attrs = {
+        "src": attr.label(mandatory = True),
+        "arch": attr.string(mandatory = True),
+    },
+)
diff --git a/build/toolchain/musl-host-gcc/sysroot/tarball.bzl b/build/toolchain/musl-host-gcc/sysroot/tarball.bzl
new file mode 100644
index 0000000..4f12049
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/sysroot/tarball.bzl
@@ -0,0 +1,76 @@
+#  Copyright 2020 The Monogon Project Authors.
+#
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+load(
+    "//build/utils:detect_root.bzl",
+    "detect_root",
+)
+
+"""
+Build a sysroot-style tarball containing musl/linux headers and libraries.
+
+This can then be used to build a C toolchain that builds for Smalltown.
+"""
+
+def _musl_gcc_tarball(ctx):
+    tarball_name = ctx.attr.name + ".tar.xz"
+    tarball = ctx.actions.declare_file(tarball_name)
+
+    musl_headers = ctx.file.musl_headers
+    musl_headers_path = musl_headers.path
+    linux_headers = ctx.file.linux_headers
+    linux_headers_path = linux_headers.path
+
+    musl_root = detect_root(ctx.attr.musl)
+    musl_files = ctx.files.musl
+
+    # This builds a tarball containing musl, musl headers and linux headers.
+    # This is done by some carefully crafted tar command line arguments that rewrite
+    # paths to ensure that everything lands in lib/ and include/ in the tarball.
+
+    # TODO(q3k): write nice, small static Go utility for this.
+
+    arguments = [tarball.path]
+    command = "tar -chJf $1"
+
+    arguments += [musl_headers_path]
+    command += " --transform 's|^'$2'|include|' $2"
+
+    arguments += [linux_headers_path]
+    command += " --transform 's|^'$3'|include|' $3"
+
+    arguments += [musl_root]
+    command += " --transform 's|^'$4'|lib|' $4"
+
+    ctx.actions.run_shell(
+        inputs = [musl_headers, linux_headers] + ctx.files.musl,
+        outputs = [tarball],
+        progress_message = "Building toolchain tarball",
+        mnemonic = "BuildToolchainTarball",
+        arguments = arguments,
+        use_default_shell_env = True,
+        command = command,
+    )
+    return [DefaultInfo(files=depset([tarball]))]
+
+musl_gcc_tarball = rule(
+    implementation = _musl_gcc_tarball,
+    attrs = {
+        "musl": attr.label(mandatory = True),
+        "musl_headers": attr.label(mandatory = True, allow_single_file = True),
+        "linux_headers": attr.label(mandatory = True, allow_single_file = True),
+    },
+)
diff --git a/build/toolchain/musl-host-gcc/sysroot_repository.bzl b/build/toolchain/musl-host-gcc/sysroot_repository.bzl
new file mode 100644
index 0000000..253abbf
--- /dev/null
+++ b/build/toolchain/musl-host-gcc/sysroot_repository.bzl
@@ -0,0 +1,42 @@
+#  Copyright 2020 The Monogon Project Authors.
+#
+#  SPDX-License-Identifier: Apache-2.0
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""
+A generic workspace rule that extracts some subpaths from a tarball.
+
+TODO(q3k): This should maybe be moved to //build/utils and called differently.
+"""
+
+def _musl_sysroot_rule_impl(rctx):
+    rctx.extract(rctx.attr.snapshot)
+    rctx.file("BUILD.bazel", """
+filegroup(
+    name = "all",
+    srcs = glob(["include/**", "lib/**"]),
+    visibility = ["//visibility:public"],
+)
+""")
+
+
+musl_sysroot_rule = repository_rule(
+    implementation = _musl_sysroot_rule_impl,
+    attrs = {
+        "snapshot": attr.label(
+            default = Label("//build/toolchain/musl-host-gcc:sysroot.tar.xz"),
+            allow_single_file = True,
+        ),
+    },
+)