cloud/bmaas/scruffy/hw_stats.go - monogon - Gitiles

 package scruffy

 import (
 	"context"
 	"errors"
 	"fmt"
 	"time"

 	"github.com/google/uuid"
 	"github.com/prometheus/client_golang/prometheus"
 	"google.golang.org/protobuf/proto"
 	"k8s.io/klog/v2"

 	"source.monogon.dev/cloud/bmaas/bmdb/model"
 	"source.monogon.dev/cloud/bmaas/server/api"
 )

 // hwStatsRunner collects metrics from the machine hardware inventory in BMDB and
 // exposes them as Prometheus metrics via a registry passed to newHWStatsRunner.
 type hwStatsRunner struct {
 	s *Server

 	memoryPerRegion     *prometheus.GaugeVec
 	cpuThreadsPerRegion *prometheus.GaugeVec
 }

 // newHWStatsRunner builds a hwStatsRunner. The hwStatsRunner then has the
 // given's Server BMDB connection bound to it and can perform actual database
 // statistic gathering.
 func newHWStatsRunner(s *Server, reg *prometheus.Registry) *hwStatsRunner {
 	hwsr := &hwStatsRunner{
 		s: s,

 		memoryPerRegion: prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "bmdb_hwstats_region_ram_bytes",
 		}, []string{"provider", "location"}),

 		cpuThreadsPerRegion: prometheus.NewGaugeVec(prometheus.GaugeOpts{
 			Name: "bmdb_hwstats_region_cpu_threads",
 		}, []string{"provider", "location"}),
 	}
 	reg.MustRegister(hwsr.memoryPerRegion, hwsr.cpuThreadsPerRegion)
 	return hwsr
 }

 func (h *hwStatsRunner) run(ctx context.Context) {
 	klog.Infof("Starting stats runner...")

 	ti := time.NewTicker(time.Minute)

 	for {
 		err := h.runOnce(ctx)
 		if err != nil {
 			if errors.Is(err, ctx.Err()) {
 				return
 			}
 			klog.Errorf("Stats run failed: %v", err)
 		}
 		select {
 		case <-ti.C:
 		case <-ctx.Done():
 			klog.Infof("Exiting stats runner (%v)...", ctx.Err())
 			return
 		}
 	}
 }

 // statsPerRegion are gathered and aggregated (summed) per region.
 type statsPerRegion struct {
 	ramBytes   uint64
 	numThreads uint64
 }

 // add a given AgentHardwareReport to this region's data.
 func (s *statsPerRegion) add(hwrep *api.AgentHardwareReport) {
 	s.ramBytes += uint64(hwrep.Report.MemoryInstalledBytes)
 	for _, cpu := range hwrep.Report.Cpu {
 		s.numThreads += uint64(cpu.HardwareThreads)
 	}
 }

 // regionKey is used to uniquely identify each region per each provider.
 type regionKey struct {
 	provider model.Provider
 	location string
 }

 func (r *regionKey) String() string {
 	return fmt.Sprintf("%s/%s", r.provider, r.location)
 }

 func (h *hwStatsRunner) runOnce(ctx context.Context) error {
 	sess, err := h.s.session(ctx)
 	if err != nil {
 		return err
 	}

 	var start uuid.UUID

 	perRegion := make(map[regionKey]*statsPerRegion)
 	var total statsPerRegion

 	for {
 		var res []model.ListMachineHardwareRow
 		err = sess.Transact(ctx, func(q *model.Queries) error {
 			res, err = q.ListMachineHardware(ctx, model.ListMachineHardwareParams{
 				Limit:     100,
 				MachineID: start,
 			})
 			return err
 		})
 		if err != nil {
 			return err
 		}
 		klog.Infof("Machines: %d chunk", len(res))
 		if len(res) == 0 {
 			break
 		}
 		for _, row := range res {
 			var hwrep api.AgentHardwareReport
 			err = proto.Unmarshal(row.HardwareReportRaw.([]byte), &hwrep)
 			if err != nil {
 				klog.Warningf("Could not decode hardware report from %s: %v", row.MachineID, err)
 				continue
 			}

 			if !row.ProviderLocation.Valid {
 				klog.Warningf("%s has no provider location, skipping", row.MachineID)
 				continue
 			}

 			key := regionKey{
 				provider: row.Provider,
 				location: row.ProviderLocation.String,
 			}
 			if _, ok := perRegion[key]; !ok {
 				perRegion[key] = &statsPerRegion{}
 			}
 			perRegion[key].add(&hwrep)
 			total.add(&hwrep)

 			start = row.MachineID
 		}
 	}

 	for k, st := range perRegion {
 		labels := prometheus.Labels{
 			"provider": string(k.provider),
 			"location": k.location,
 		}

 		h.memoryPerRegion.With(labels).Set(float64(st.ramBytes))
 		h.cpuThreadsPerRegion.With(labels).Set(float64(st.numThreads))
 	}
 	return nil
 }
	package scruffy

	import (
	"context"
	"errors"
	"fmt"
	"time"

	"github.com/google/uuid"
	"github.com/prometheus/client_golang/prometheus"
	"google.golang.org/protobuf/proto"
	"k8s.io/klog/v2"

	"source.monogon.dev/cloud/bmaas/bmdb/model"
	"source.monogon.dev/cloud/bmaas/server/api"
	)

	// hwStatsRunner collects metrics from the machine hardware inventory in BMDB and
	// exposes them as Prometheus metrics via a registry passed to newHWStatsRunner.
	type hwStatsRunner struct {
	s *Server

	memoryPerRegion *prometheus.GaugeVec
	cpuThreadsPerRegion *prometheus.GaugeVec
	}

	// newHWStatsRunner builds a hwStatsRunner. The hwStatsRunner then has the
	// given's Server BMDB connection bound to it and can perform actual database
	// statistic gathering.
	func newHWStatsRunner(s Server, reg prometheus.Registry) *hwStatsRunner {
	hwsr := &hwStatsRunner{
	s: s,

	memoryPerRegion: prometheus.NewGaugeVec(prometheus.GaugeOpts{
	Name: "bmdb_hwstats_region_ram_bytes",
	}, []string{"provider", "location"}),

	cpuThreadsPerRegion: prometheus.NewGaugeVec(prometheus.GaugeOpts{
	Name: "bmdb_hwstats_region_cpu_threads",
	}, []string{"provider", "location"}),
	}
	reg.MustRegister(hwsr.memoryPerRegion, hwsr.cpuThreadsPerRegion)
	return hwsr
	}

	func (h *hwStatsRunner) run(ctx context.Context) {
	klog.Infof("Starting stats runner...")

	ti := time.NewTicker(time.Minute)

	for {
	err := h.runOnce(ctx)
	if err != nil {
	if errors.Is(err, ctx.Err()) {
	return
	}
	klog.Errorf("Stats run failed: %v", err)
	}
	select {
	case <-ti.C:
	case <-ctx.Done():
	klog.Infof("Exiting stats runner (%v)...", ctx.Err())
	return
	}
	}
	}

	// statsPerRegion are gathered and aggregated (summed) per region.
	type statsPerRegion struct {
	ramBytes uint64
	numThreads uint64
	}

	// add a given AgentHardwareReport to this region's data.
	func (s statsPerRegion) add(hwrep api.AgentHardwareReport) {
	s.ramBytes += uint64(hwrep.Report.MemoryInstalledBytes)
	for _, cpu := range hwrep.Report.Cpu {
	s.numThreads += uint64(cpu.HardwareThreads)
	}
	}

	// regionKey is used to uniquely identify each region per each provider.
	type regionKey struct {
	provider model.Provider
	location string
	}

	func (r *regionKey) String() string {
	return fmt.Sprintf("%s/%s", r.provider, r.location)
	}

	func (h *hwStatsRunner) runOnce(ctx context.Context) error {
	sess, err := h.s.session(ctx)
	if err != nil {
	return err
	}

	var start uuid.UUID

	perRegion := make(map[regionKey]*statsPerRegion)
	var total statsPerRegion

	for {
	var res []model.ListMachineHardwareRow
	err = sess.Transact(ctx, func(q *model.Queries) error {
	res, err = q.ListMachineHardware(ctx, model.ListMachineHardwareParams{
	Limit: 100,
	MachineID: start,
	})
	return err
	})
	if err != nil {
	return err
	}
	klog.Infof("Machines: %d chunk", len(res))
	if len(res) == 0 {
	break
	}
	for _, row := range res {
	var hwrep api.AgentHardwareReport
	err = proto.Unmarshal(row.HardwareReportRaw.([]byte), &hwrep)
	if err != nil {
	klog.Warningf("Could not decode hardware report from %s: %v", row.MachineID, err)
	continue
	}

	if !row.ProviderLocation.Valid {
	klog.Warningf("%s has no provider location, skipping", row.MachineID)
	continue
	}

	key := regionKey{
	provider: row.Provider,
	location: row.ProviderLocation.String,
	}
	if _, ok := perRegion[key]; !ok {
	perRegion[key] = &statsPerRegion{}
	}
	perRegion[key].add(&hwrep)
	total.add(&hwrep)

	start = row.MachineID
	}
	}

	for k, st := range perRegion {
	labels := prometheus.Labels{
	"provider": string(k.provider),
	"location": k.location,
	}

	h.memoryPerRegion.With(labels).Set(float64(st.ramBytes))
	h.cpuThreadsPerRegion.With(labels).Set(float64(st.numThreads))
	}
	return nil
	}