blob: d020a778cac83d25d4b4ad61f24bb12aa5fce8d5 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Serge Bazanski6f599512023-04-26 19:08:19 +02004// Package scruffy implements Scruffy, The Janitor.
5//
6// Scruffy is a BMaaS component which runs a bunch of important, housekeeping-ish
7// processes that aren't tied to any particular provider and are mostly
8// batch-oriented.
9//
10// Currently Scruffy just collects metrics from the BMDB.
11package scruffy
12
13import (
14 "context"
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020015 "errors"
Serge Bazanski6f599512023-04-26 19:08:19 +020016 "flag"
17 "fmt"
18 "os"
19 "time"
20
21 "github.com/cenkalti/backoff/v4"
22 "k8s.io/klog/v2"
23
24 "source.monogon.dev/cloud/bmaas/bmdb"
25 "source.monogon.dev/cloud/bmaas/bmdb/metrics"
26 "source.monogon.dev/cloud/bmaas/bmdb/webug"
27 "source.monogon.dev/cloud/lib/component"
28)
29
30type Config struct {
31 Component component.ComponentConfig
32 BMDB bmdb.BMDB
33 Webug webug.Config
34
35 StatsRunnerRate time.Duration
36}
37
38// TODO(q3k): factor this out to BMDB library?
39func runtimeInfo() string {
40 hostname, _ := os.Hostname()
41 if hostname == "" {
42 hostname = "UNKNOWN"
43 }
44 return fmt.Sprintf("host %s", hostname)
45}
46
47func (c *Config) RegisterFlags() {
48 c.Component.RegisterFlags("scruffy")
49 c.BMDB.ComponentName = "scruffy"
50 c.BMDB.RuntimeInfo = runtimeInfo()
51 c.BMDB.Database.RegisterFlags("bmdb")
52 c.Webug.RegisterFlags()
53
54 flag.DurationVar(&c.StatsRunnerRate, "scruffy_stats_collection_rate", time.Minute, "How often the stats collection loop will run against BMDB")
55}
56
57type Server struct {
58 Config Config
59
60 bmdb *bmdb.Connection
61 sessionC chan *bmdb.Session
62}
63
64func (s *Server) Start(ctx context.Context) {
65 reg := s.Config.Component.PrometheusRegistry()
66 s.Config.BMDB.EnableMetrics(reg)
67 s.Config.Component.StartPrometheus(ctx)
68
69 conn, err := s.Config.BMDB.Open(true)
70 if err != nil {
71 klog.Exitf("Failed to connect to BMDB: %v", err)
72 }
73 s.bmdb = conn
74 s.sessionC = make(chan *bmdb.Session)
75 go s.sessionWorker(ctx)
76
77 bsr := newBMDBStatsRunner(s, reg)
78 go bsr.run(ctx)
79
80 hwr := newHWStatsRunner(s, reg)
81 go hwr.run(ctx)
82
83 go func() {
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020084 if err := s.Config.Webug.Start(ctx, conn); err != nil && !errors.Is(err, ctx.Err()) {
Serge Bazanski6f599512023-04-26 19:08:19 +020085 klog.Exitf("Failed to start webug: %v", err)
86 }
87 }()
88}
89
90// sessionWorker emits a valid BMDB session to sessionC as long as ctx is active.
91//
92// TODO(q3k): factor out into bmdb client lib
93func (s *Server) sessionWorker(ctx context.Context) {
94 var session *bmdb.Session
95 for {
96 if session == nil || session.Expired() {
97 klog.Infof("Starting new session...")
98 bo := backoff.NewExponentialBackOff()
99 err := backoff.Retry(func() error {
100 var err error
101 session, err = s.bmdb.StartSession(ctx, bmdb.SessionOption{Processor: metrics.ProcessorScruffyStats})
102 if err != nil {
103 klog.Errorf("Failed to start session: %v", err)
104 return err
105 } else {
106 return nil
107 }
108 }, backoff.WithContext(bo, ctx))
109 if err != nil {
110 // If something's really wrong just crash.
111 klog.Exitf("Gave up on starting session: %v", err)
112 }
113 klog.Infof("New session: %s", session.UUID)
114 }
115
116 select {
117 case <-ctx.Done():
118 return
119 case s.sessionC <- session:
120 }
121 }
122}
123
124func (s *Server) session(ctx context.Context) (*bmdb.Session, error) {
125 select {
126 case sess := <-s.sessionC:
127 return sess, nil
128 case <-ctx.Done():
129 return nil, ctx.Err()
130 }
131}