m/t/launch/cluster: add ShutdownNode/StartNode calls
This is a simplistic implementation of the ability to shut down and then
start nodes back up.
This has the following known issues:
1. Starting a node back up won't start it's TPM emulator again.
2. LaunchNode and StartNode likely should be reworked into CreateNode
and StartNode.
A future change will clean this up, but this is enough to be able to
implement cold cluster startup tests.
Change-Id: I2ed34a30c8659e5023866aaa8f4ff19caafb53fd
Reviewed-on: https://review.monogon.dev/c/monogon/+/2942
Tested-by: Jenkins CI
Reviewed-by: Tim Windelschmidt <tim@monogon.tech>
diff --git a/metropolis/test/launch/cluster/cluster.go b/metropolis/test/launch/cluster/cluster.go
index 436a27f..522b2f1 100644
--- a/metropolis/test/launch/cluster/cluster.go
+++ b/metropolis/test/launch/cluster/cluster.go
@@ -1117,6 +1117,52 @@
return nil
}
+// ShutdownNode performs an ungraceful shutdown (i.e. power off) of the node
+// given by idx. If the node is already shut down, this is a no-op.
+func (c *Cluster) ShutdownNode(idx int) error {
+ if idx < 0 || idx >= len(c.NodeIDs) {
+ return fmt.Errorf("index out of bounds")
+ }
+ // Return if node is already stopped.
+ select {
+ case <-c.nodeOpts[idx].Runtime.ctxT.Done():
+ return nil
+ default:
+ }
+ id := c.NodeIDs[idx]
+
+ // Cancel the node's context. This will shut down QEMU.
+ c.nodeOpts[idx].Runtime.CtxC()
+ launch.Log("Cluster: waiting for node %d (%s) to stop.", idx, id)
+ err := <-c.nodesDone[idx]
+ if err != nil {
+ return fmt.Errorf("while shutting down node: %w", err)
+ }
+ return nil
+}
+
+// StartNode performs a power on of the node given by idx. If the node is already
+// running, this is a no-op.
+func (c *Cluster) StartNode(idx int) error {
+ if idx < 0 || idx >= len(c.NodeIDs) {
+ return fmt.Errorf("index out of bounds")
+ }
+ id := c.NodeIDs[idx]
+ // Return if node is already running.
+ select {
+ case <-c.nodeOpts[idx].Runtime.ctxT.Done():
+ default:
+ return nil
+ }
+
+ // Start QEMU again.
+ launch.Log("Cluster: starting node %d (%s).", idx, id)
+ if err := LaunchNode(c.ctxT, c.launchDir, c.socketDir, &c.nodeOpts[idx], c.nodesDone[idx]); err != nil {
+ return fmt.Errorf("failed to launch node %d: %w", idx, err)
+ }
+ return nil
+}
+
// Close cancels the running clusters' context and waits for all virtualized
// nodes to stop. It returns an error if stopping the nodes failed, or one of
// the nodes failed to fully start in the first place.