m/t/launch/cluster: add ShutdownNode/StartNode calls This is a simplistic implementation of the ability to shut down and then start nodes back up. This has the following known issues: 1. Starting a node back up won't start it's TPM emulator again. 2. LaunchNode and StartNode likely should be reworked into CreateNode and StartNode. A future change will clean this up, but this is enough to be able to implement cold cluster startup tests. Change-Id: I2ed34a30c8659e5023866aaa8f4ff19caafb53fd Reviewed-on: https://review.monogon.dev/c/monogon/+/2942 Tested-by: Jenkins CI Reviewed-by: Tim Windelschmidt <tim@monogon.tech>

commit: 500f6e08356b1cf358e619247582be784616964e [log] [tgz]
author: Serge Bazanski <serge@monogon.tech> Wed Apr 03 12:06:40 2024 +0200
committer: Serge Bazanski <serge@monogon.tech> Thu Apr 25 12:01:18 2024 +0000
tree: acbefa8a8ff731ef38ea547172d8c87249209e77
parent: e564f17cc268763402ccaed1d2416309568c06f9 [diff]
diff --git a/metropolis/test/launch/cluster/cluster.go b/metropolis/test/launch/cluster/cluster.go
index 436a27f..522b2f1 100644
--- a/metropolis/test/launch/cluster/cluster.go
+++ b/metropolis/test/launch/cluster/cluster.go

@@ -1117,6 +1117,52 @@
 	return nil
 }
 
+// ShutdownNode performs an ungraceful shutdown (i.e. power off) of the node
+// given by idx. If the node is already shut down, this is a no-op.
+func (c *Cluster) ShutdownNode(idx int) error {
+	if idx < 0 || idx >= len(c.NodeIDs) {
+		return fmt.Errorf("index out of bounds")
+	}
+	// Return if node is already stopped.
+	select {
+	case <-c.nodeOpts[idx].Runtime.ctxT.Done():
+		return nil
+	default:
+	}
+	id := c.NodeIDs[idx]
+
+	// Cancel the node's context. This will shut down QEMU.
+	c.nodeOpts[idx].Runtime.CtxC()
+	launch.Log("Cluster: waiting for node %d (%s) to stop.", idx, id)
+	err := <-c.nodesDone[idx]
+	if err != nil {
+		return fmt.Errorf("while shutting down node: %w", err)
+	}
+	return nil
+}
+
+// StartNode performs a power on of the node given by idx. If the node is already
+// running, this is a no-op.
+func (c *Cluster) StartNode(idx int) error {
+	if idx < 0 || idx >= len(c.NodeIDs) {
+		return fmt.Errorf("index out of bounds")
+	}
+	id := c.NodeIDs[idx]
+	// Return if node is already running.
+	select {
+	case <-c.nodeOpts[idx].Runtime.ctxT.Done():
+	default:
+		return nil
+	}
+
+	// Start QEMU again.
+	launch.Log("Cluster: starting node %d (%s).", idx, id)
+	if err := LaunchNode(c.ctxT, c.launchDir, c.socketDir, &c.nodeOpts[idx], c.nodesDone[idx]); err != nil {
+		return fmt.Errorf("failed to launch node %d: %w", idx, err)
+	}
+	return nil
+}
+
 // Close cancels the running clusters' context and waits for all virtualized
 // nodes to stop. It returns an error if stopping the nodes failed, or one of
 // the nodes failed to fully start in the first place.
commit	500f6e08356b1cf358e619247582be784616964e	[log] [tgz]
author	Serge Bazanski <serge@monogon.tech>	Wed Apr 03 12:06:40 2024 +0200
committer	Serge Bazanski <serge@monogon.tech>	Thu Apr 25 12:01:18 2024 +0000
tree	acbefa8a8ff731ef38ea547172d8c87249209e77
parent	e564f17cc268763402ccaed1d2416309568c06f9 [diff]