Revamp DHCP, add basic context management

This started off as a small change to make the network service DHCP client a bit nicer, and ended up basically me half-assedly starting to add context within Smalltown.

In my opionion a simple OnStart/OnStop lifecycle management for services will stop working once we have to start handling failing services. I think taking inspiration from Erlang's OTP and implementing some sort of supervision tree is the way to go. I think this also ties nicely together with Go's context system, at least partially. Implementing the full supervision tree system is out of scope for this change, but at least this introduces .Context() on the base service struct that service implementations can use. Currently each service has its own background context, but again, this should tie into some sort of supervision tree in the future. There will be a design document for this.

I also rejigger the init code to have a context available immediately, and use that to acquire (with timeout) information about DHCP addresses from the network service.

I also fix a bug where the network service is started twice (once by init, once by the smalltown node code; now the smalltown node code takes in a dependency injected network service instead).

I also fix a bug where OnStop would call OnStart. Whoops.

Test Plan: no new functionality, covered by current tests

Bug: T561

X-Origin-Diff: phab/D396
GitOrigin-RevId: adddf3dd2f140b6ea64eb034ff19533d32c4ef23
diff --git a/core/internal/node/main.go b/core/internal/node/main.go
index b1d74d6..b0674d2 100644
--- a/core/internal/node/main.go
+++ b/core/internal/node/main.go
@@ -18,6 +18,7 @@
 
 import (
 	"bytes"
+	"context"
 	"crypto/ed25519"
 	"crypto/rand"
 	"crypto/sha512"
@@ -31,9 +32,8 @@
 	"io/ioutil"
 	"math/big"
 	"net"
-	"time"
-
 	"os"
+	"time"
 
 	apipb "git.monogon.dev/source/nexantic.git/core/generated/api"
 	"git.monogon.dev/source/nexantic.git/core/internal/api"
@@ -43,12 +43,12 @@
 	"git.monogon.dev/source/nexantic.git/core/internal/kubernetes"
 	"git.monogon.dev/source/nexantic.git/core/internal/network"
 	"git.monogon.dev/source/nexantic.git/core/internal/storage"
-	"github.com/cenkalti/backoff/v4"
-	"google.golang.org/grpc"
-	"google.golang.org/grpc/credentials"
 
+	"github.com/cenkalti/backoff/v4"
 	"github.com/gogo/protobuf/proto"
 	"go.uber.org/zap"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
 )
 
 var (
@@ -71,32 +71,22 @@
 	}
 )
 
-func NewSmalltownNode(logger *zap.Logger) (*SmalltownNode, error) {
+func NewSmalltownNode(logger *zap.Logger, ntwk *network.Service, strg *storage.Manager) (*SmalltownNode, error) {
 	flag.Parse()
 	logger.Info("Creating Smalltown node")
+	ctx := context.Background()
 
 	hostname, err := os.Hostname()
 	if err != nil {
 		panic(err)
 	}
 
-	networkService, err := network.NewNetworkService(network.Config{}, logger.With(zap.String("component", "network")))
+	// Wait for IP adddress...
+	ctxT, ctxTC := context.WithTimeout(ctx, time.Second*10)
+	defer ctxTC()
+	externalIP, err := ntwk.GetIP(ctxT, true)
 	if err != nil {
-		panic(err)
-	}
-
-	if err := networkService.Start(); err != nil {
-		logger.Panic("Failed to start network service", zap.Error(err))
-	}
-
-	storageManager, err := storage.Initialize(logger.With(zap.String("component", "storage")))
-	if err != nil {
-		logger.Error("Failed to initialize storage manager", zap.Error(err))
-		return nil, err
-	}
-	externalIP := networkService.GetIP(true)
-	if externalIP == nil {
-		logger.Panic("Waited for IP but didn't get one")
+		logger.Panic("Could not get IP address", zap.Error(err))
 	}
 
 	// Important to know if the GetIP above hangs
@@ -113,8 +103,8 @@
 
 	s := &SmalltownNode{
 		Consensus: consensusService,
-		Storage:   storageManager,
-		Network:   networkService,
+		Storage:   strg,
+		Network:   ntwk,
 		logger:    logger,
 		hostname:  hostname,
 	}
@@ -133,7 +123,7 @@
 	return s, nil
 }
 
-func (s *SmalltownNode) Start() error {
+func (s *SmalltownNode) Start(ctx context.Context) error {
 	s.logger.Info("Starting Smalltown node")
 
 	// TODO(lorenz): Abstracting enrolment sounds like a good idea, but ends up being painful
@@ -156,22 +146,22 @@
 		if len(enrolmentConfig.EnrolmentSecret) == 0 {
 			return s.startFull()
 		}
-		return s.startEnrolling()
+		return s.startEnrolling(ctx)
 	} else if os.IsNotExist(err) {
 		// This is ok like this, once a new cluster has been set up the initial node also generates
 		// its own enrolment config
-		return s.startForSetup()
+		return s.startForSetup(ctx)
 	}
 	// Unknown error reading enrolment config (disk issues/invalid configuration format/...)
 	s.logger.Panic("Invalid enrolment configuration provided", zap.Error(err))
 	panic("Unreachable")
 }
 
-func (s *SmalltownNode) startEnrolling() error {
+func (s *SmalltownNode) startEnrolling(ctx context.Context) error {
 	s.logger.Info("Initializing subsystems for enrolment")
 	s.state = common.StateEnrollMode
 
-	nodeInfo, nodeID, err := s.InitializeNode()
+	nodeInfo, nodeID, err := s.InitializeNode(ctx)
 	if err != nil {
 		return err
 	}
@@ -211,9 +201,9 @@
 	return nil
 }
 
-func (s *SmalltownNode) startForSetup() error {
+func (s *SmalltownNode) startForSetup(ctx context.Context) error {
 	s.logger.Info("Setting up a new cluster")
-	initData, nodeID, err := s.InitializeNode()
+	initData, nodeID, err := s.InitializeNode(ctx)
 	if err != nil {
 		return err
 	}
@@ -276,10 +266,14 @@
 		return err
 	}
 
+	ip, err := s.Network.GetIP(ctx, true)
+	if err != nil {
+		return fmt.Errorf("could not get node IP: %v", err)
+	}
 	enrolmentConfig := &apipb.EnrolmentConfig{
 		EnrolmentSecret: []byte{}, // First node is always already enrolled
 		MastersCert:     masterCert,
-		MasterIps:       [][]byte{[]byte(*s.Network.GetIP(true))},
+		MasterIps:       [][]byte{[]byte(*ip)},
 		NodeId:          nodeID,
 	}
 	enrolmentConfigRaw, err := proto.Marshal(enrolmentConfig)