blob: 8d836f2ba1742541c115eee9c15b3d77a280948f [file] [log] [blame]
Serge Bazanski9c09c4e2020-03-24 13:58:01 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package supervisor
18
Serge Bazanski216fe7b2021-05-21 18:36:16 +020019// Supporting infrastructure to allow running some non-Go payloads under
20// supervision.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010021
22import (
23 "context"
Lorenz Brune3032bd2023-04-13 14:43:41 +020024 "errors"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010025 "net"
Lorenz Brune3032bd2023-04-13 14:43:41 +020026 "os"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010027 "os/exec"
Serge Bazanski216fe7b2021-05-21 18:36:16 +020028
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010029 "google.golang.org/grpc"
Serge Bazanski96043bc2021-10-05 12:10:13 +020030
31 "source.monogon.dev/metropolis/pkg/logtree"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010032)
33
Serge Bazanski216fe7b2021-05-21 18:36:16 +020034// GRPCServer creates a Runnable that serves gRPC requests as longs as it's not
35// canceled.
36// If graceful is set to true, the server will be gracefully stopped instead of
37// plain stopped. This means all pending RPCs will finish, but also requires
38// streaming gRPC handlers to check their context liveliness and exit
39// accordingly. If the server code does not support this, `graceful` should be
40// false and the server will be killed violently instead.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010041func GRPCServer(srv *grpc.Server, lis net.Listener, graceful bool) Runnable {
42 return func(ctx context.Context) error {
43 Signal(ctx, SignalHealthy)
Jan Schär23e52302024-03-21 16:50:15 +010044 defer func() {
45 if graceful {
46 srv.GracefulStop()
47 } else {
48 srv.Stop()
49 }
50 }()
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010051 errC := make(chan error)
52 go func() {
53 errC <- srv.Serve(lis)
54 }()
55 select {
56 case <-ctx.Done():
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010057 return ctx.Err()
58 case err := <-errC:
59 return err
60 }
61 }
62}
63
Serge Bazanski216fe7b2021-05-21 18:36:16 +020064// RunCommand will create a Runnable that starts a long-running command, whose
65// exit is determined to be a failure.
Serge Bazanski05604292021-03-12 17:47:21 +010066func RunCommand(ctx context.Context, cmd *exec.Cmd, opts ...RunCommandOption) error {
Serge Bazanski967be212020-11-02 11:26:59 +010067 Signal(ctx, SignalHealthy)
Serge Bazanski05604292021-03-12 17:47:21 +010068
69 var parseKLog bool
Lorenz Brune3032bd2023-04-13 14:43:41 +020070 var signal <-chan os.Signal
Serge Bazanski05604292021-03-12 17:47:21 +010071 for _, opt := range opts {
72 if opt.parseKlog {
73 parseKLog = true
74 }
Lorenz Brune3032bd2023-04-13 14:43:41 +020075 if opt.signal != nil {
76 signal = opt.signal
77 }
Serge Bazanski05604292021-03-12 17:47:21 +010078 }
79
80 if parseKLog {
81 // We make two klogs, one for each of stdout/stderr. This is to prevent
82 // accidental interleaving of both.
83 klogStdout := logtree.KLogParser(Logger(ctx))
84 defer klogStdout.Close()
85 klogStderr := logtree.KLogParser(Logger(ctx))
86 defer klogStderr.Close()
87
88 cmd.Stdout = klogStdout
89 cmd.Stderr = klogStderr
90 } else {
91 cmd.Stdout = RawLogger(ctx)
92 cmd.Stderr = RawLogger(ctx)
93 }
Lorenz Brune3032bd2023-04-13 14:43:41 +020094 err := cmd.Start()
95 if err != nil {
96 return err
97 }
98
99 exited := make(chan struct{})
100 if signal != nil {
101 go func() {
102 for {
103 var err error
104 select {
105 case s := <-signal:
106 err = cmd.Process.Signal(s)
107 case <-exited:
108 return
109 }
110 if err != nil && !errors.Is(err, os.ErrProcessDone) {
111 Logger(ctx).Warningf("Failed sending signal to process: %v", err)
112 }
113 }
114 }()
115 }
116
117 err = cmd.Wait()
118 if signal != nil {
119 exited <- struct{}{}
120 }
Serge Bazanski967be212020-11-02 11:26:59 +0100121 Logger(ctx).Infof("Command returned: %v", err)
122 return err
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100123}
Serge Bazanski05604292021-03-12 17:47:21 +0100124
125type RunCommandOption struct {
126 parseKlog bool
Lorenz Brune3032bd2023-04-13 14:43:41 +0200127 signal <-chan os.Signal
Serge Bazanski05604292021-03-12 17:47:21 +0100128}
129
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200130// ParseKLog signals that the command being run will return klog-compatible
131// logs to stdout and/or stderr, and these will be re-interpreted as structured
Serge Bazanski05604292021-03-12 17:47:21 +0100132// logging and emitted to the supervisor's logger.
133func ParseKLog() RunCommandOption {
134 return RunCommandOption{
135 parseKlog: true,
136 }
137}
Lorenz Brune3032bd2023-04-13 14:43:41 +0200138
139// SignalChan takes a channel which can be used to send signals to the
140// supervised process.
141//
142// The given channel will be read from as long as the underlying process is
143// running. If the process doesn't start successfully the channel will not be
144// read. When the process exits, the channel will stop being read.
145//
146// With the above in mind, and also taking into account the inherent lack of
147// reliability in delivering any process-handled signals in POSIX/Linux, it is
148// recommended to use unbuffered channels, always write to them in a non-blocking
149// fashion (eg. in a select { ... default: } block), and to not rely only on the
150// signal delivery mechanism for the intended behaviour.
151//
152// For example, if the signals are used to trigger some configuration reload,
153// these configuration reloads should either be verified and signal delivery should
154// be retried until confirmed successful, or there should be a backup periodic
155// reload performed by the target process independently of signal-based reload
156// triggers.
157//
158// Another example: if the signal delivered is a SIGTERM used to gracefully
159// terminate some process, it should be attempted to be delivered a number of
160// times before finally SIGKILLing the process.
161func SignalChan(s <-chan os.Signal) RunCommandOption {
162 return RunCommandOption{
163 signal: s,
164 }
165}