blob: 2f2da738894dbe0fd022d327a9c929a6ad806338 [file] [log] [blame]
Serge Bazanski54e212a2023-06-14 13:45:11 +02001package metrics
2
3import (
4 "context"
5 "crypto/tls"
6 "crypto/x509"
7 "fmt"
8 "net"
9 "net/http"
10 "os/exec"
11
12 "source.monogon.dev/metropolis/node"
13 "source.monogon.dev/metropolis/node/core/identity"
14 "source.monogon.dev/metropolis/pkg/supervisor"
15)
16
17// Service is the Metropolis Metrics Service.
18//
19// Currently, metrics means Prometheus metrics.
20//
21// It runs a forwarding proxy from a public HTTPS listener to a number of
22// locally-running exporters, themselves listening over HTTP. The listener uses
23// the main cluster CA and the node's main certificate, authenticating incoming
24// connections with the same CA.
25//
26// Each exporter is exposed on a separate path, /metrics/<name>, where <name> is
27// the name of the exporter.
28//
29// The HTTPS listener is bound to node.MetricsPort.
30type Service struct {
31 // Credentials used to run the TLS/HTTPS listener and verify incoming
32 // connections.
33 Credentials *identity.NodeCredentials
34 // List of Exporters to run and to forward HTTP requests to. If not set, defaults
35 // to DefaultExporters.
36 Exporters []Exporter
37
38 // enableDynamicAddr enables listening on a dynamically chosen TCP port. This is
39 // used by tests to make sure we don't fail due to the default port being already
40 // in use.
41 enableDynamicAddr bool
42 // dynamicAddr will contain the picked dynamic listen address after the service
43 // starts, if enableDynamicAddr is set.
44 dynamicAddr chan string
45}
46
47// listen starts the public TLS listener for the service.
48func (s *Service) listen() (net.Listener, error) {
49 cert := s.Credentials.TLSCredentials()
50
51 pool := x509.NewCertPool()
52 pool.AddCert(s.Credentials.ClusterCA())
53
54 tlsc := tls.Config{
55 Certificates: []tls.Certificate{
56 cert,
57 },
58 ClientAuth: tls.RequireAndVerifyClientCert,
59 ClientCAs: pool,
60 // TODO(q3k): use VerifyPeerCertificate/VerifyConnection to check that the
61 // incoming client is allowed to access metrics. Currently we allow
62 // anyone/anything with a valid cluster certificate to access them.
63 }
64
65 addr := net.JoinHostPort("", node.MetricsPort.PortString())
66 if s.enableDynamicAddr {
67 addr = ""
68 }
69 return tls.Listen("tcp", addr, &tlsc)
70}
71
72func (s *Service) Run(ctx context.Context) error {
73 lis, err := s.listen()
74 if err != nil {
75 return fmt.Errorf("listen failed: %w", err)
76 }
77 if s.enableDynamicAddr {
78 s.dynamicAddr <- lis.Addr().String()
79 }
80
81 if s.Exporters == nil {
82 s.Exporters = DefaultExporters
83 }
84
85 // First, make sure we don't have duplicate exporters.
86 seenNames := make(map[string]bool)
87 for _, exporter := range s.Exporters {
88 if seenNames[exporter.Name] {
89 return fmt.Errorf("duplicate exporter name: %q", exporter.Name)
90 }
91 seenNames[exporter.Name] = true
92 }
93
94 // Start all exporters as sub-runnables.
95 for _, exporter := range s.Exporters {
96 cmd := exec.CommandContext(ctx, exporter.Executable, exporter.Arguments...)
97 err := supervisor.Run(ctx, exporter.Name, func(ctx context.Context) error {
98 return supervisor.RunCommand(ctx, cmd)
99 })
100 if err != nil {
101 return fmt.Errorf("running %s failed: %w", exporter.Name, err)
102 }
103
104 }
105
106 // And register all exporter forwarding functions on a mux.
107 mux := http.NewServeMux()
108 logger := supervisor.Logger(ctx)
109 for _, exporter := range s.Exporters {
110 exporter := exporter
111
112 mux.HandleFunc(exporter.externalPath(), func(w http.ResponseWriter, r *http.Request) {
113 exporter.forward(logger, w, r)
114 })
115
116 logger.Infof("Registered exporter %q", exporter.Name)
117 }
118
119 supervisor.Signal(ctx, supervisor.SignalHealthy)
120
121 // Start forwarding server.
122 srv := http.Server{
123 Handler: mux,
124 BaseContext: func(_ net.Listener) context.Context {
125 return ctx
126 },
127 }
128
129 go func() {
130 <-ctx.Done()
131 srv.Close()
132 }()
133
134 err = srv.Serve(lis)
135 if err != nil && ctx.Err() != nil {
136 return ctx.Err()
137 }
138 return fmt.Errorf("Serve: %w", err)
139}
140
141type sdTarget struct {
142 Targets []string `json:"target"`
143}