blob: c554c089ba01fecafe24e05f91e634a80eb04821 [file] [log] [blame]
Serge Bazanski9c09c4e2020-03-24 13:58:01 +01001// Copyright 2020 The Monogon Project Authors.
2//
3// SPDX-License-Identifier: Apache-2.0
4//
5// Licensed under the Apache License, Version 2.0 (the "License");
6// you may not use this file except in compliance with the License.
7// You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing, software
12// distributed under the License is distributed on an "AS IS" BASIS,
13// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// See the License for the specific language governing permissions and
15// limitations under the License.
16
17package supervisor
18
Serge Bazanski216fe7b2021-05-21 18:36:16 +020019// Supporting infrastructure to allow running some non-Go payloads under
20// supervision.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010021
22import (
23 "context"
Lorenz Brune3032bd2023-04-13 14:43:41 +020024 "errors"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010025 "net"
Lorenz Brune3032bd2023-04-13 14:43:41 +020026 "os"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010027 "os/exec"
Serge Bazanski216fe7b2021-05-21 18:36:16 +020028
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010029 "google.golang.org/grpc"
Serge Bazanski96043bc2021-10-05 12:10:13 +020030
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020031 "source.monogon.dev/osbase/logtree"
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010032)
33
Serge Bazanski216fe7b2021-05-21 18:36:16 +020034// GRPCServer creates a Runnable that serves gRPC requests as longs as it's not
35// canceled.
36// If graceful is set to true, the server will be gracefully stopped instead of
37// plain stopped. This means all pending RPCs will finish, but also requires
38// streaming gRPC handlers to check their context liveliness and exit
39// accordingly. If the server code does not support this, `graceful` should be
40// false and the server will be killed violently instead.
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010041func GRPCServer(srv *grpc.Server, lis net.Listener, graceful bool) Runnable {
42 return func(ctx context.Context) error {
43 Signal(ctx, SignalHealthy)
Jan Schär23e52302024-03-21 16:50:15 +010044 defer func() {
45 if graceful {
46 srv.GracefulStop()
47 } else {
48 srv.Stop()
49 }
50 }()
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010051 errC := make(chan error)
52 go func() {
53 errC <- srv.Serve(lis)
54 }()
55 select {
56 case <-ctx.Done():
Serge Bazanski9c09c4e2020-03-24 13:58:01 +010057 return ctx.Err()
58 case err := <-errC:
59 return err
60 }
61 }
62}
63
Serge Bazanski216fe7b2021-05-21 18:36:16 +020064// RunCommand will create a Runnable that starts a long-running command, whose
65// exit is determined to be a failure.
Jan Schärbdbb9c22024-12-18 15:14:02 +010066// cmd should be created with [exec.CommandContext] so that it will be killed
67// when the context is canceled.
Serge Bazanski05604292021-03-12 17:47:21 +010068func RunCommand(ctx context.Context, cmd *exec.Cmd, opts ...RunCommandOption) error {
Serge Bazanski967be212020-11-02 11:26:59 +010069 Signal(ctx, SignalHealthy)
Serge Bazanski05604292021-03-12 17:47:21 +010070
71 var parseKLog bool
Lorenz Brune3032bd2023-04-13 14:43:41 +020072 var signal <-chan os.Signal
Serge Bazanski05604292021-03-12 17:47:21 +010073 for _, opt := range opts {
74 if opt.parseKlog {
75 parseKLog = true
76 }
Lorenz Brune3032bd2023-04-13 14:43:41 +020077 if opt.signal != nil {
78 signal = opt.signal
79 }
Serge Bazanski05604292021-03-12 17:47:21 +010080 }
81
82 if parseKLog {
83 // We make two klogs, one for each of stdout/stderr. This is to prevent
84 // accidental interleaving of both.
85 klogStdout := logtree.KLogParser(Logger(ctx))
86 defer klogStdout.Close()
87 klogStderr := logtree.KLogParser(Logger(ctx))
88 defer klogStderr.Close()
89
90 cmd.Stdout = klogStdout
91 cmd.Stderr = klogStderr
92 } else {
93 cmd.Stdout = RawLogger(ctx)
94 cmd.Stderr = RawLogger(ctx)
95 }
Lorenz Brune3032bd2023-04-13 14:43:41 +020096 err := cmd.Start()
97 if err != nil {
98 return err
99 }
100
101 exited := make(chan struct{})
102 if signal != nil {
103 go func() {
104 for {
105 var err error
106 select {
107 case s := <-signal:
108 err = cmd.Process.Signal(s)
109 case <-exited:
110 return
111 }
112 if err != nil && !errors.Is(err, os.ErrProcessDone) {
113 Logger(ctx).Warningf("Failed sending signal to process: %v", err)
114 }
115 }
116 }()
117 }
118
119 err = cmd.Wait()
120 if signal != nil {
121 exited <- struct{}{}
122 }
Serge Bazanski967be212020-11-02 11:26:59 +0100123 Logger(ctx).Infof("Command returned: %v", err)
124 return err
Serge Bazanski9c09c4e2020-03-24 13:58:01 +0100125}
Serge Bazanski05604292021-03-12 17:47:21 +0100126
127type RunCommandOption struct {
128 parseKlog bool
Lorenz Brune3032bd2023-04-13 14:43:41 +0200129 signal <-chan os.Signal
Serge Bazanski05604292021-03-12 17:47:21 +0100130}
131
Serge Bazanski216fe7b2021-05-21 18:36:16 +0200132// ParseKLog signals that the command being run will return klog-compatible
133// logs to stdout and/or stderr, and these will be re-interpreted as structured
Serge Bazanski05604292021-03-12 17:47:21 +0100134// logging and emitted to the supervisor's logger.
135func ParseKLog() RunCommandOption {
136 return RunCommandOption{
137 parseKlog: true,
138 }
139}
Lorenz Brune3032bd2023-04-13 14:43:41 +0200140
141// SignalChan takes a channel which can be used to send signals to the
142// supervised process.
143//
144// The given channel will be read from as long as the underlying process is
145// running. If the process doesn't start successfully the channel will not be
146// read. When the process exits, the channel will stop being read.
147//
148// With the above in mind, and also taking into account the inherent lack of
149// reliability in delivering any process-handled signals in POSIX/Linux, it is
150// recommended to use unbuffered channels, always write to them in a non-blocking
151// fashion (eg. in a select { ... default: } block), and to not rely only on the
152// signal delivery mechanism for the intended behaviour.
153//
154// For example, if the signals are used to trigger some configuration reload,
155// these configuration reloads should either be verified and signal delivery should
156// be retried until confirmed successful, or there should be a backup periodic
157// reload performed by the target process independently of signal-based reload
158// triggers.
159//
160// Another example: if the signal delivered is a SIGTERM used to gracefully
161// terminate some process, it should be attempted to be delivered a number of
162// times before finally SIGKILLing the process.
163func SignalChan(s <-chan os.Signal) RunCommandOption {
164 return RunCommandOption{
165 signal: s,
166 }
167}