blob: 58e0f51e153a1d69ed9fef38a59a219f4c8fd29d [file] [log] [blame]
Serge Bazanski6f599512023-04-26 19:08:19 +02001// Package scruffy implements Scruffy, The Janitor.
2//
3// Scruffy is a BMaaS component which runs a bunch of important, housekeeping-ish
4// processes that aren't tied to any particular provider and are mostly
5// batch-oriented.
6//
7// Currently Scruffy just collects metrics from the BMDB.
8package scruffy
9
10import (
11 "context"
12 "flag"
13 "fmt"
14 "os"
15 "time"
16
17 "github.com/cenkalti/backoff/v4"
18 "k8s.io/klog/v2"
19
20 "source.monogon.dev/cloud/bmaas/bmdb"
21 "source.monogon.dev/cloud/bmaas/bmdb/metrics"
22 "source.monogon.dev/cloud/bmaas/bmdb/webug"
23 "source.monogon.dev/cloud/lib/component"
24)
25
26type Config struct {
27 Component component.ComponentConfig
28 BMDB bmdb.BMDB
29 Webug webug.Config
30
31 StatsRunnerRate time.Duration
32}
33
34// TODO(q3k): factor this out to BMDB library?
35func runtimeInfo() string {
36 hostname, _ := os.Hostname()
37 if hostname == "" {
38 hostname = "UNKNOWN"
39 }
40 return fmt.Sprintf("host %s", hostname)
41}
42
43func (c *Config) RegisterFlags() {
44 c.Component.RegisterFlags("scruffy")
45 c.BMDB.ComponentName = "scruffy"
46 c.BMDB.RuntimeInfo = runtimeInfo()
47 c.BMDB.Database.RegisterFlags("bmdb")
48 c.Webug.RegisterFlags()
49
50 flag.DurationVar(&c.StatsRunnerRate, "scruffy_stats_collection_rate", time.Minute, "How often the stats collection loop will run against BMDB")
51}
52
53type Server struct {
54 Config Config
55
56 bmdb *bmdb.Connection
57 sessionC chan *bmdb.Session
58}
59
60func (s *Server) Start(ctx context.Context) {
61 reg := s.Config.Component.PrometheusRegistry()
62 s.Config.BMDB.EnableMetrics(reg)
63 s.Config.Component.StartPrometheus(ctx)
64
65 conn, err := s.Config.BMDB.Open(true)
66 if err != nil {
67 klog.Exitf("Failed to connect to BMDB: %v", err)
68 }
69 s.bmdb = conn
70 s.sessionC = make(chan *bmdb.Session)
71 go s.sessionWorker(ctx)
72
73 bsr := newBMDBStatsRunner(s, reg)
74 go bsr.run(ctx)
75
76 hwr := newHWStatsRunner(s, reg)
77 go hwr.run(ctx)
78
79 go func() {
80 if err := s.Config.Webug.Start(ctx, conn); err != nil && err != ctx.Err() {
81 klog.Exitf("Failed to start webug: %v", err)
82 }
83 }()
84}
85
86// sessionWorker emits a valid BMDB session to sessionC as long as ctx is active.
87//
88// TODO(q3k): factor out into bmdb client lib
89func (s *Server) sessionWorker(ctx context.Context) {
90 var session *bmdb.Session
91 for {
92 if session == nil || session.Expired() {
93 klog.Infof("Starting new session...")
94 bo := backoff.NewExponentialBackOff()
95 err := backoff.Retry(func() error {
96 var err error
97 session, err = s.bmdb.StartSession(ctx, bmdb.SessionOption{Processor: metrics.ProcessorScruffyStats})
98 if err != nil {
99 klog.Errorf("Failed to start session: %v", err)
100 return err
101 } else {
102 return nil
103 }
104 }, backoff.WithContext(bo, ctx))
105 if err != nil {
106 // If something's really wrong just crash.
107 klog.Exitf("Gave up on starting session: %v", err)
108 }
109 klog.Infof("New session: %s", session.UUID)
110 }
111
112 select {
113 case <-ctx.Done():
114 return
115 case s.sessionC <- session:
116 }
117 }
118}
119
120func (s *Server) session(ctx context.Context) (*bmdb.Session, error) {
121 select {
122 case sess := <-s.sessionC:
123 return sess, nil
124 case <-ctx.Done():
125 return nil, ctx.Err()
126 }
127}