blob: 01d22bff159b71448644457a1c79d4d0b22eefd1 [file] [log] [blame]
Serge Bazanski6f599512023-04-26 19:08:19 +02001// Package scruffy implements Scruffy, The Janitor.
2//
3// Scruffy is a BMaaS component which runs a bunch of important, housekeeping-ish
4// processes that aren't tied to any particular provider and are mostly
5// batch-oriented.
6//
7// Currently Scruffy just collects metrics from the BMDB.
8package scruffy
9
10import (
11 "context"
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020012 "errors"
Serge Bazanski6f599512023-04-26 19:08:19 +020013 "flag"
14 "fmt"
15 "os"
16 "time"
17
18 "github.com/cenkalti/backoff/v4"
19 "k8s.io/klog/v2"
20
21 "source.monogon.dev/cloud/bmaas/bmdb"
22 "source.monogon.dev/cloud/bmaas/bmdb/metrics"
23 "source.monogon.dev/cloud/bmaas/bmdb/webug"
24 "source.monogon.dev/cloud/lib/component"
25)
26
27type Config struct {
28 Component component.ComponentConfig
29 BMDB bmdb.BMDB
30 Webug webug.Config
31
32 StatsRunnerRate time.Duration
33}
34
35// TODO(q3k): factor this out to BMDB library?
36func runtimeInfo() string {
37 hostname, _ := os.Hostname()
38 if hostname == "" {
39 hostname = "UNKNOWN"
40 }
41 return fmt.Sprintf("host %s", hostname)
42}
43
44func (c *Config) RegisterFlags() {
45 c.Component.RegisterFlags("scruffy")
46 c.BMDB.ComponentName = "scruffy"
47 c.BMDB.RuntimeInfo = runtimeInfo()
48 c.BMDB.Database.RegisterFlags("bmdb")
49 c.Webug.RegisterFlags()
50
51 flag.DurationVar(&c.StatsRunnerRate, "scruffy_stats_collection_rate", time.Minute, "How often the stats collection loop will run against BMDB")
52}
53
54type Server struct {
55 Config Config
56
57 bmdb *bmdb.Connection
58 sessionC chan *bmdb.Session
59}
60
61func (s *Server) Start(ctx context.Context) {
62 reg := s.Config.Component.PrometheusRegistry()
63 s.Config.BMDB.EnableMetrics(reg)
64 s.Config.Component.StartPrometheus(ctx)
65
66 conn, err := s.Config.BMDB.Open(true)
67 if err != nil {
68 klog.Exitf("Failed to connect to BMDB: %v", err)
69 }
70 s.bmdb = conn
71 s.sessionC = make(chan *bmdb.Session)
72 go s.sessionWorker(ctx)
73
74 bsr := newBMDBStatsRunner(s, reg)
75 go bsr.run(ctx)
76
77 hwr := newHWStatsRunner(s, reg)
78 go hwr.run(ctx)
79
80 go func() {
Tim Windelschmidtd5f851b2024-04-23 14:59:37 +020081 if err := s.Config.Webug.Start(ctx, conn); err != nil && !errors.Is(err, ctx.Err()) {
Serge Bazanski6f599512023-04-26 19:08:19 +020082 klog.Exitf("Failed to start webug: %v", err)
83 }
84 }()
85}
86
87// sessionWorker emits a valid BMDB session to sessionC as long as ctx is active.
88//
89// TODO(q3k): factor out into bmdb client lib
90func (s *Server) sessionWorker(ctx context.Context) {
91 var session *bmdb.Session
92 for {
93 if session == nil || session.Expired() {
94 klog.Infof("Starting new session...")
95 bo := backoff.NewExponentialBackOff()
96 err := backoff.Retry(func() error {
97 var err error
98 session, err = s.bmdb.StartSession(ctx, bmdb.SessionOption{Processor: metrics.ProcessorScruffyStats})
99 if err != nil {
100 klog.Errorf("Failed to start session: %v", err)
101 return err
102 } else {
103 return nil
104 }
105 }, backoff.WithContext(bo, ctx))
106 if err != nil {
107 // If something's really wrong just crash.
108 klog.Exitf("Gave up on starting session: %v", err)
109 }
110 klog.Infof("New session: %s", session.UUID)
111 }
112
113 select {
114 case <-ctx.Done():
115 return
116 case s.sessionC <- session:
117 }
118 }
119}
120
121func (s *Server) session(ctx context.Context) (*bmdb.Session, error) {
122 select {
123 case sess := <-s.sessionC:
124 return sess, nil
125 case <-ctx.Done():
126 return nil, ctx.Err()
127 }
128}