cloud/shepherd/manager: fix data race
Change-Id: I74dfe7ebd274a2014df827ae804d02cded3af090
Reviewed-on: https://review.monogon.dev/c/monogon/+/3689
Tested-by: Jenkins CI
Reviewed-by: Lorenz Brun <lorenz@monogon.tech>
diff --git a/cloud/shepherd/manager/initializer_test.go b/cloud/shepherd/manager/initializer_test.go
index 5ba2253..9545b5f 100644
--- a/cloud/shepherd/manager/initializer_test.go
+++ b/cloud/shepherd/manager/initializer_test.go
@@ -83,6 +83,8 @@
}
}
+ provider.muMachines.RLock()
+ defer provider.muMachines.RUnlock()
for _, m := range provider.machines {
if !m.agentStarted {
t.Fatalf("Initializer didn't start agent on machine %q", m.id)
diff --git a/cloud/shepherd/manager/provider_test.go b/cloud/shepherd/manager/provider_test.go
index 4cdfb18..c88c996 100644
--- a/cloud/shepherd/manager/provider_test.go
+++ b/cloud/shepherd/manager/provider_test.go
@@ -4,6 +4,7 @@
"context"
"fmt"
"net/netip"
+ "sync"
"time"
"github.com/google/uuid"
@@ -70,7 +71,9 @@
return nil, err
}
+ dsc.dp.muMachines.RLock()
m := dsc.dp.machines[shepherd.ProviderID(uid.String())]
+ dsc.dp.muMachines.RUnlock()
if m == nil {
return nil, fmt.Errorf("failed finding machine in map")
}
@@ -95,12 +98,16 @@
type dummyProvider struct {
capacity int
machines map[shepherd.ProviderID]*dummyMachine
+ muMachines sync.RWMutex
}
func (dp *dummyProvider) createDummyMachines(ctx context.Context, session *bmdb.Session, count int) ([]shepherd.Machine, error) {
+ dp.muMachines.RLock()
if len(dp.machines)+count > dp.capacity {
+ dp.muMachines.RUnlock()
return nil, fmt.Errorf("no capacity left")
}
+ dp.muMachines.RUnlock()
var machines []shepherd.Machine
for i := 0; i < count; i++ {
@@ -123,9 +130,11 @@
func (dp *dummyProvider) ListMachines(ctx context.Context) ([]shepherd.Machine, error) {
var machines []shepherd.Machine
+ dp.muMachines.RLock()
for _, m := range dp.machines {
machines = append(machines, m)
}
+ dp.muMachines.RUnlock()
unusedMachineCount := dp.capacity - len(machines)
for i := 0; i < unusedMachineCount; i++ {
@@ -141,6 +150,8 @@
}
func (dp *dummyProvider) GetMachine(ctx context.Context, id shepherd.ProviderID) (shepherd.Machine, error) {
+ dp.muMachines.RLock()
+ defer dp.muMachines.RUnlock()
for _, m := range dp.machines {
if m.ID() == id {
return m, nil
@@ -177,7 +188,9 @@
}
dm.availability = shepherd.AvailabilityKnownUsed
+ dp.muMachines.Lock()
dp.machines[dm.id] = dm
+ dp.muMachines.Unlock()
return dm, nil
}
diff --git a/cloud/shepherd/manager/provisioner_test.go b/cloud/shepherd/manager/provisioner_test.go
index 079bf96..aea7717 100644
--- a/cloud/shepherd/manager/provisioner_test.go
+++ b/cloud/shepherd/manager/provisioner_test.go
@@ -64,19 +64,21 @@
return err
})
if err != nil {
- t.Errorf("Transact failed: %v", err)
+ t.Fatalf("Transact failed: %v", err)
}
if len(provided) < 10 {
continue
}
if len(provided) > 10 {
- t.Errorf("%d machines provided (limit: 10)", len(provided))
+ t.Fatalf("%d machines provided (limit: 10)", len(provided))
}
for _, mp := range provided {
+ provider.muMachines.RLock()
if provider.machines[shepherd.ProviderID(mp.ProviderID)] == nil {
- t.Errorf("BMDB machine %q has unknown provider ID %q", mp.MachineID, mp.ProviderID)
+ t.Fatalf("BMDB machine %q has unknown provider ID %q", mp.MachineID, mp.ProviderID)
}
+ provider.muMachines.RUnlock()
}
return
@@ -131,7 +133,7 @@
t.Run(tt.name, func(t *testing.T) {
p := &Provisioner{}
if got := p.resolvePossiblyUsed(&dummyMachine{id: tt.machineID, availability: tt.machineAvailability}, providedMachines); got != tt.wantedAvailability {
- t.Errorf("resolvePossiblyUsed() = %v, want %v", got, tt.wantedAvailability)
+ t.Fatalf("resolvePossiblyUsed() = %v, want %v", got, tt.wantedAvailability)
}
})
}