m/t/launch/cluster: add ShutdownNode/StartNode calls

This is a simplistic implementation of the ability to shut down and then
start nodes back up.

This has the following known issues:

 1. Starting a node back up won't start it's TPM emulator again.
 2. LaunchNode and StartNode likely should be reworked into CreateNode
    and StartNode.

A future change will clean this up, but this is enough to be able to
implement cold cluster startup tests.

Change-Id: I2ed34a30c8659e5023866aaa8f4ff19caafb53fd
Reviewed-on: https://review.monogon.dev/c/monogon/+/2942
Tested-by: Jenkins CI
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
diff --git a/metropolis/test/launch/cluster/cluster.go b/metropolis/test/launch/cluster/cluster.go
index 436a27f..522b2f1 100644
--- a/metropolis/test/launch/cluster/cluster.go
+++ b/metropolis/test/launch/cluster/cluster.go
@@ -1117,6 +1117,52 @@
 	return nil
 }
 
+// ShutdownNode performs an ungraceful shutdown (i.e. power off) of the node
+// given by idx. If the node is already shut down, this is a no-op.
+func (c *Cluster) ShutdownNode(idx int) error {
+	if idx < 0 || idx >= len(c.NodeIDs) {
+		return fmt.Errorf("index out of bounds")
+	}
+	// Return if node is already stopped.
+	select {
+	case <-c.nodeOpts[idx].Runtime.ctxT.Done():
+		return nil
+	default:
+	}
+	id := c.NodeIDs[idx]
+
+	// Cancel the node's context. This will shut down QEMU.
+	c.nodeOpts[idx].Runtime.CtxC()
+	launch.Log("Cluster: waiting for node %d (%s) to stop.", idx, id)
+	err := <-c.nodesDone[idx]
+	if err != nil {
+		return fmt.Errorf("while shutting down node: %w", err)
+	}
+	return nil
+}
+
+// StartNode performs a power on of the node given by idx. If the node is already
+// running, this is a no-op.
+func (c *Cluster) StartNode(idx int) error {
+	if idx < 0 || idx >= len(c.NodeIDs) {
+		return fmt.Errorf("index out of bounds")
+	}
+	id := c.NodeIDs[idx]
+	// Return if node is already running.
+	select {
+	case <-c.nodeOpts[idx].Runtime.ctxT.Done():
+	default:
+		return nil
+	}
+
+	// Start QEMU again.
+	launch.Log("Cluster: starting node %d (%s).", idx, id)
+	if err := LaunchNode(c.ctxT, c.launchDir, c.socketDir, &c.nodeOpts[idx], c.nodesDone[idx]); err != nil {
+		return fmt.Errorf("failed to launch node %d: %w", idx, err)
+	}
+	return nil
+}
+
 // Close cancels the running clusters' context and waits for all virtualized
 // nodes to stop. It returns an error if stopping the nodes failed, or one of
 // the nodes failed to fully start in the first place.