cloud/shepherd/manager: fix data race

Change-Id: I74dfe7ebd274a2014df827ae804d02cded3af090
Reviewed-on: https://review.monogon.dev/c/monogon/+/3689
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/cloud/shepherd/manager/initializer_test.go b/cloud/shepherd/manager/initializer_test.go
index 5ba2253..9545b5f 100644
--- a/cloud/shepherd/manager/initializer_test.go
+++ b/cloud/shepherd/manager/initializer_test.go
@@ -83,6 +83,8 @@
 		}
 	}
 
+	provider.muMachines.RLock()
+	defer provider.muMachines.RUnlock()
 	for _, m := range provider.machines {
 		if !m.agentStarted {
 			t.Fatalf("Initializer didn't start agent on machine %q", m.id)
diff --git a/cloud/shepherd/manager/provider_test.go b/cloud/shepherd/manager/provider_test.go
index 4cdfb18..c88c996 100644
--- a/cloud/shepherd/manager/provider_test.go
+++ b/cloud/shepherd/manager/provider_test.go
@@ -4,6 +4,7 @@
 	"context"
 	"fmt"
 	"net/netip"
+	"sync"
 	"time"
 
 	"github.com/google/uuid"
@@ -70,7 +71,9 @@
 		return nil, err
 	}
 
+	dsc.dp.muMachines.RLock()
 	m := dsc.dp.machines[shepherd.ProviderID(uid.String())]
+	dsc.dp.muMachines.RUnlock()
 	if m == nil {
 		return nil, fmt.Errorf("failed finding machine in map")
 	}
@@ -95,12 +98,16 @@
 type dummyProvider struct {
 	capacity int
 	machines map[shepherd.ProviderID]*dummyMachine
+	muMachines sync.RWMutex
 }
 
 func (dp *dummyProvider) createDummyMachines(ctx context.Context, session *bmdb.Session, count int) ([]shepherd.Machine, error) {
+	dp.muMachines.RLock()
 	if len(dp.machines)+count > dp.capacity {
+		dp.muMachines.RUnlock()
 		return nil, fmt.Errorf("no capacity left")
 	}
+	dp.muMachines.RUnlock()
 
 	var machines []shepherd.Machine
 	for i := 0; i < count; i++ {
@@ -123,9 +130,11 @@
 
 func (dp *dummyProvider) ListMachines(ctx context.Context) ([]shepherd.Machine, error) {
 	var machines []shepherd.Machine
+	dp.muMachines.RLock()
 	for _, m := range dp.machines {
 		machines = append(machines, m)
 	}
+	dp.muMachines.RUnlock()
 
 	unusedMachineCount := dp.capacity - len(machines)
 	for i := 0; i < unusedMachineCount; i++ {
@@ -141,6 +150,8 @@
 }
 
 func (dp *dummyProvider) GetMachine(ctx context.Context, id shepherd.ProviderID) (shepherd.Machine, error) {
+	dp.muMachines.RLock()
+	defer dp.muMachines.RUnlock()
 	for _, m := range dp.machines {
 		if m.ID() == id {
 			return m, nil
@@ -177,7 +188,9 @@
 	}
 
 	dm.availability = shepherd.AvailabilityKnownUsed
+	dp.muMachines.Lock()
 	dp.machines[dm.id] = dm
+	dp.muMachines.Unlock()
 
 	return dm, nil
 }
diff --git a/cloud/shepherd/manager/provisioner_test.go b/cloud/shepherd/manager/provisioner_test.go
index 079bf96..aea7717 100644
--- a/cloud/shepherd/manager/provisioner_test.go
+++ b/cloud/shepherd/manager/provisioner_test.go
@@ -64,19 +64,21 @@
 			return err
 		})
 		if err != nil {
-			t.Errorf("Transact failed: %v", err)
+			t.Fatalf("Transact failed: %v", err)
 		}
 		if len(provided) < 10 {
 			continue
 		}
 		if len(provided) > 10 {
-			t.Errorf("%d machines provided (limit: 10)", len(provided))
+			t.Fatalf("%d machines provided (limit: 10)", len(provided))
 		}
 
 		for _, mp := range provided {
+			provider.muMachines.RLock()
 			if provider.machines[shepherd.ProviderID(mp.ProviderID)] == nil {
-				t.Errorf("BMDB machine %q has unknown provider ID %q", mp.MachineID, mp.ProviderID)
+				t.Fatalf("BMDB machine %q has unknown provider ID %q", mp.MachineID, mp.ProviderID)
 			}
+			provider.muMachines.RUnlock()
 		}
 
 		return
@@ -131,7 +133,7 @@
 		t.Run(tt.name, func(t *testing.T) {
 			p := &Provisioner{}
 			if got := p.resolvePossiblyUsed(&dummyMachine{id: tt.machineID, availability: tt.machineAvailability}, providedMachines); got != tt.wantedAvailability {
-				t.Errorf("resolvePossiblyUsed() = %v, want %v", got, tt.wantedAvailability)
+				t.Fatalf("resolvePossiblyUsed() = %v, want %v", got, tt.wantedAvailability)
 			}
 		})
 	}