metropolis/test: create swtpm TPMs at runtime instead of compile time
The generated TPM data is random (it contains generated cryptographic
keys) so we really shouldn't be building it with Bazel.
Instead, let's create it at runtime for e2e tests, and also actually
generate separate TPM data per node with a common issuer for all.
Moving the logic out of //metropolis/node also feels deserved, as this
is all squarely in test territory.
Change-Id: I257ee54c88ede685ba3faf573282b0f9228b10e8
Reviewed-on: https://review.monogon.dev/c/monogon/+/3132
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/metropolis/test/launch/cluster/BUILD.bazel b/metropolis/test/launch/cluster/BUILD.bazel
index 89f7a7a..efa04cb 100644
--- a/metropolis/test/launch/cluster/BUILD.bazel
+++ b/metropolis/test/launch/cluster/BUILD.bazel
@@ -7,15 +7,19 @@
"insecure_key.go",
"metroctl.go",
"prefixed_stdio.go",
+ "swtpm.go",
],
data = [
"//metropolis/node:image",
- "//metropolis/node:swtpm_data",
"//metropolis/test/ktest:linux-testing",
"//metropolis/test/nanoswitch:initramfs",
+ "//metropolis/test/swtpm/certtool",
+ "//metropolis/test/swtpm/swtpm_cert",
"//third_party/edk2:firmware",
"@com_github_bonzini_qboot//:qboot-bin",
"@swtpm",
+ "@swtpm//:swtpm_localca",
+ "@swtpm//:swtpm_setup",
],
importpath = "source.monogon.dev/metropolis/test/launch/cluster",
visibility = ["//visibility:public"],
diff --git a/metropolis/test/launch/cluster/cluster.go b/metropolis/test/launch/cluster/cluster.go
index 4c678a1..e2f2925 100644
--- a/metropolis/test/launch/cluster/cluster.go
+++ b/metropolis/test/launch/cluster/cluster.go
@@ -160,22 +160,6 @@
return nil, fmt.Errorf("while copying firmware variables: %w", err)
}
- // Create the TPM state directory and initialize all files required by swtpm.
- tpmt := filepath.Join(stdp, "tpm")
- if err := os.Mkdir(tpmt, 0o755); err != nil {
- return nil, fmt.Errorf("while creating the TPM directory: %w", err)
- }
- for _, name := range []string{"issuercert.pem", "signkey.pem", "tpm2-00.permall"} {
- src, err := runfiles.Rlocation(filepath.Join("_main/metropolis/node/tpm", name))
- if err != nil {
- return nil, fmt.Errorf("while resolving a path: %w", err)
- }
- tgt := filepath.Join(tpmt, name)
- if err := copyFile(src, tgt); err != nil {
- return nil, fmt.Errorf("while copying TPM state: file %q to %q: %w", src, tgt, err)
- }
- }
-
// Create the socket directory.
sotdp, err := os.MkdirTemp(sd, "node_sock*")
if err != nil {
@@ -225,7 +209,7 @@
// (swtpm <-> QEMU interplay) respectively. The directories must exist before
// LaunchNode is called. LaunchNode will update options.Runtime and options.Mac
// if either are not initialized.
-func LaunchNode(ctx context.Context, ld, sd string, options *NodeOptions, doneC chan error) error {
+func LaunchNode(ctx context.Context, ld, sd string, tpmFactory *TPMFactory, options *NodeOptions, doneC chan error) error {
// TODO(mateusz@monogon.tech) try using QEMU's abstract socket namespace instead
// of /tmp (requires QEMU version >5.0).
// https://github.com/qemu/qemu/commit/776b97d3605ed0fc94443048fdf988c7725e38a9).
@@ -325,6 +309,17 @@
qemuArgs = append(qemuArgs, "-object", qemuNetDump.ToOption("filter-dump"))
}
+ // Manufacture TPM if needed.
+ tpmd := filepath.Join(r.ld, "tpm")
+ err = tpmFactory.Manufacture(ctx, tpmd, &TPMPlatform{
+ Manufacturer: "Monogon",
+ Version: "1.0",
+ Model: "TestCluster",
+ })
+ if err != nil {
+ return fmt.Errorf("could not manufacture TPM: %w", err)
+ }
+
// Start TPM emulator as a subprocess
swtpm, err := runfiles.Rlocation("swtpm/swtpm")
if err != nil {
@@ -333,7 +328,6 @@
tpmCtx, tpmCancel := context.WithCancel(options.Runtime.ctxT)
- tpmd := filepath.Join(r.ld, "tpm")
tpmEmuCmd := exec.CommandContext(tpmCtx, swtpm, "socket", "--tpm2", "--tpmstate", "dir="+tpmd, "--ctrl", "type=unixio,path="+tpmSocketPath)
// Silence warnings from unsafe libtpms build (uses non-constant-time
// cryptographic operations).
@@ -612,6 +606,8 @@
// ctxC is used by Close to cancel the context under which the nodes are
// running.
ctxC context.CancelFunc
+
+ tpmFactory *TPMFactory
}
// NodeInCluster represents information about a node that's part of a Cluster.
@@ -739,6 +735,12 @@
return nil, fmt.Errorf("failed to create the socket directory: %w", err)
}
+ // Set up TPM factory.
+ tpmf, err := NewTPMFactory(filepath.Join(ld, "tpm"))
+ if err != nil {
+ return nil, fmt.Errorf("failed to create TPM factory: %w", err)
+ }
+
// Prepare links between nodes and nanoswitch.
var switchPorts []*os.File
var vmPorts []*os.File
@@ -791,7 +793,7 @@
// Start the first node.
ctxT, ctxC := context.WithCancel(ctx)
launch.Log("Cluster: Starting node %d...", 1)
- if err := LaunchNode(ctxT, ld, sd, &nodeOpts[0], done[0]); err != nil {
+ if err := LaunchNode(ctxT, ld, sd, tpmf, &nodeOpts[0], done[0]); err != nil {
ctxC()
return nil, fmt.Errorf("failed to launch first node: %w", err)
}
@@ -911,6 +913,8 @@
ctxT: ctxT,
ctxC: ctxC,
+
+ tpmFactory: tpmf,
}
// Now start the rest of the nodes and register them into the cluster.
@@ -977,7 +981,7 @@
// Now run the rest of the nodes.
for i := 1; i < opts.NumNodes; i++ {
launch.Log("Cluster: Starting node %d...", i+1)
- err := LaunchNode(ctxT, ld, sd, &nodeOpts[i], done[i])
+ err := LaunchNode(ctxT, ld, sd, tpmf, &nodeOpts[i], done[i])
if err != nil {
return nil, fmt.Errorf("failed to launch node %d: %w", i+1, err)
}
@@ -1099,7 +1103,7 @@
// Start QEMU again.
launch.Log("Cluster: restarting node %d (%s).", idx, id)
- if err := LaunchNode(c.ctxT, c.launchDir, c.socketDir, &c.nodeOpts[idx], c.nodesDone[idx]); err != nil {
+ if err := LaunchNode(c.ctxT, c.launchDir, c.socketDir, c.tpmFactory, &c.nodeOpts[idx], c.nodesDone[idx]); err != nil {
return fmt.Errorf("failed to launch node %d: %w", idx, err)
}
@@ -1145,6 +1149,7 @@
if err != nil {
return fmt.Errorf("while shutting down node: %w", err)
}
+ launch.Log("Cluster: node %d (%s) stopped.", idx, id)
return nil
}
@@ -1164,9 +1169,10 @@
// Start QEMU again.
launch.Log("Cluster: starting node %d (%s).", idx, id)
- if err := LaunchNode(c.ctxT, c.launchDir, c.socketDir, &c.nodeOpts[idx], c.nodesDone[idx]); err != nil {
+ if err := LaunchNode(c.ctxT, c.launchDir, c.socketDir, c.tpmFactory, &c.nodeOpts[idx], c.nodesDone[idx]); err != nil {
return fmt.Errorf("failed to launch node %d: %w", idx, err)
}
+ launch.Log("Cluster: node %d (%s) started.", idx, id)
return nil
}
diff --git a/metropolis/test/launch/cluster/swtpm.go b/metropolis/test/launch/cluster/swtpm.go
new file mode 100644
index 0000000..0f9b5c5
--- /dev/null
+++ b/metropolis/test/launch/cluster/swtpm.go
@@ -0,0 +1,186 @@
+package cluster
+
+import (
+ "context"
+ "fmt"
+ "log"
+ "os"
+ "os/exec"
+ "path/filepath"
+ "sort"
+ "strings"
+
+ "github.com/bazelbuild/rules_go/go/runfiles"
+
+ "source.monogon.dev/metropolis/test/launch"
+)
+
+// A TPMFactory manufactures virtual TPMs using swtpm.
+//
+// A factory has an assigned state directory into which it will write per-factory
+// data (like CA certificates and keys). Each manufactured TPM also has a state
+// directory, which is first generated on manufacturing, and then passed to an
+// swtpm instance.
+type TPMFactory struct {
+ stateDir string
+}
+
+// NewTPMFactory creates a new TPM factory at a given state path. The state path
+// is a directory used to persist TPM factory data. It will be created if needed,
+// and can be reused across TPM factories (but not used in parallel).
+func NewTPMFactory(stateDir string) (*TPMFactory, error) {
+ if err := os.MkdirAll(stateDir, 0744); err != nil {
+ return nil, fmt.Errorf("could not create state directory: %w", err)
+ }
+
+ f := &TPMFactory{
+ stateDir: stateDir,
+ }
+
+ if err := os.MkdirAll(f.caDir(), 0700); err != nil {
+ return nil, fmt.Errorf("could not create CA state directory: %w", err)
+ }
+ err := writeSWTPMConfig(f.localCAConfPath(), map[string]string{
+ "statedir": f.caDir(),
+ "signingkey": filepath.Join(f.caDir(), "signkey.pem"),
+ "issuercert": filepath.Join(f.caDir(), "issuercert.pem"),
+ "certserial": filepath.Join(f.caDir(), "certserial"),
+ })
+ if err != nil {
+ return nil, err
+ }
+ return f, nil
+}
+
+func (f *TPMFactory) caDir() string {
+ return filepath.Join(f.stateDir, "ca")
+}
+
+func (f *TPMFactory) localCAConfPath() string {
+ return filepath.Join(f.caDir(), "swtpm-localca.conf")
+}
+
+func (f *TPMFactory) localCAOptionsPath() string {
+ return filepath.Join(f.caDir(), "swtpm-localca.options")
+}
+
+func (f *TPMFactory) swtpmConfPath() string {
+ return filepath.Join(f.stateDir, "swtpm.conf")
+}
+
+// writeSWTPMConfig serializes a key/value config file for swtpm tools into a
+// path.
+func writeSWTPMConfig(path string, data map[string]string) error {
+ var keys []string
+ for k := range data {
+ keys = append(keys, k)
+ }
+ sort.Strings(keys)
+
+ f, err := os.Create(path)
+ if err != nil {
+ return err
+ }
+ defer f.Close()
+ for _, k := range keys {
+ if _, err := fmt.Fprintf(f, "%s = %s\n", k, data[k]); err != nil {
+ return err
+ }
+ }
+ return nil
+}
+
+// A TPMPlatform defines a platform that a TPM is part of. This will usually be
+// some kind of device, in this case a virtual device.
+type TPMPlatform struct {
+ Manufacturer string
+ Version string
+ Model string
+}
+
+// Manufacture builds a new TPM for a given platform at a path. The path points
+// to a directory that will be created if it doens't exist yet, and can be passed
+// to swtpm to actually emulate the created TPM.
+func (f *TPMFactory) Manufacture(ctx context.Context, path string, platform *TPMPlatform) error {
+ launch.Log("Starting to manufacture TPM for %s... (%+v)", path, platform)
+
+ // Path to state file. Used to make sure Manufacture runs only once.
+ permall := filepath.Join(path, "tpm2-00.permall")
+
+ if _, err := os.Stat(permall); err == nil {
+ launch.Log("Skipping manufacturing TPM for %s, already exists", path)
+ return nil
+ }
+
+ // Find all tools.
+ swtpm, err := runfiles.Rlocation("swtpm/swtpm")
+ if err != nil {
+ return fmt.Errorf("could not find swtpm: %w", err)
+ }
+ swtpmSetup, err := runfiles.Rlocation("swtpm/swtpm_setup")
+ if err != nil {
+ return fmt.Errorf("could not find swtpm_setup: %w", err)
+ }
+ swtpmLocalca, err := runfiles.Rlocation("swtpm/swtpm_localca")
+ if err != nil {
+ return fmt.Errorf("could not find swtpm_localca: %w", err)
+ }
+ swtpmCert, err := runfiles.Rlocation("_main/metropolis/test/swtpm/swtpm_cert/swtpm_cert_/swtpm_cert")
+ if err != nil {
+ return fmt.Errorf("could not find swtpm_cert: %w", err)
+ }
+ certtool, err := runfiles.Rlocation("_main/metropolis/test/swtpm/certtool/certtool_/certtool")
+ if err != nil {
+ return fmt.Errorf("could not find certtool: %w", err)
+ }
+
+ // Prepare swtpm-localca.options.
+ options := []string{
+ "--platform-manufacturer " + platform.Manufacturer,
+ "--platform-version " + platform.Version,
+ "--platform-model " + platform.Model,
+ "",
+ }
+ err = os.WriteFile(f.localCAOptionsPath(), []byte(strings.Join(options, "\n")), 0600)
+ if err != nil {
+ return fmt.Errorf("could not write local options: %w", err)
+ }
+
+ // Prepare swptm.conf.
+ err = writeSWTPMConfig(f.swtpmConfPath(), map[string]string{
+ "create_certs_tool": swtpmLocalca,
+ "create_certs_tool_config": f.localCAConfPath(),
+ "create_certs_tool_options": f.localCAOptionsPath(),
+ })
+ if err != nil {
+ return fmt.Errorf("could not write swtpm.conf: %w", err)
+ }
+
+ if err := os.MkdirAll(path, 0700); err != nil {
+ return fmt.Errorf("could not make output path: %w", err)
+ }
+ cmd := exec.CommandContext(ctx, swtpmSetup,
+ "--tpm", fmt.Sprintf("%s socket", swtpm),
+ "--tpmstate", path,
+ "--create-ek-cert",
+ "--create-platform-cert",
+ "--allow-signing",
+ "--tpm2",
+ "--display",
+ "--pcr-banks", "sha1,sha256,sha384,sha512",
+ "--config", f.swtpmConfPath())
+ cmd.Env = append(cmd.Env, fmt.Sprintf("PATH=%s:%s", filepath.Dir(swtpmCert), filepath.Dir(certtool)))
+ cmd.Env = append(cmd.Env, "MONOGON_LIBTPMS_ACKNOWLEDGE_UNSAFE=yes")
+ if out, err := cmd.CombinedOutput(); err != nil {
+ log.Printf("Manufacturing TPM for %s failed: swtm_setup: %s", path, out)
+ return fmt.Errorf("swtpm_setup failed: %w", err)
+ }
+
+ if _, err := os.Stat(permall); os.IsNotExist(err) {
+ log.Printf("Manufacturing TPM for %s failed: state file did not get created", path)
+ return fmt.Errorf("%s did not get created during TPM manufacture", permall)
+ }
+
+ launch.Log("Successfully manufactured TPM for %s", path)
+ return nil
+}