Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 1 | // Copyright 2020 The Monogon Project Authors. |
| 2 | // |
| 3 | // SPDX-License-Identifier: Apache-2.0 |
| 4 | // |
| 5 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | // you may not use this file except in compliance with the License. |
| 7 | // You may obtain a copy of the License at |
| 8 | // |
| 9 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | // |
| 11 | // Unless required by applicable law or agreed to in writing, software |
| 12 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | // See the License for the specific language governing permissions and |
| 15 | // limitations under the License. |
| 16 | |
| 17 | package logtree |
| 18 | |
| 19 | import ( |
| 20 | "fmt" |
| 21 | "io" |
| 22 | "regexp" |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 23 | "strconv" |
| 24 | "strings" |
| 25 | "time" |
Serge Bazanski | 96043bc | 2021-10-05 12:10:13 +0200 | [diff] [blame] | 26 | |
| 27 | "source.monogon.dev/metropolis/pkg/logbuffer" |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 28 | ) |
| 29 | |
| 30 | // KLogParser returns an io.WriteCloser to which raw logging from a klog emitter |
| 31 | // can be piped. It will attempt to parse all lines from this log as |
| 32 | // glog/klog-style entries, and pass them over to a LeveledLogger as if they were |
| 33 | // emitted locally. |
| 34 | // |
| 35 | // This allows for piping in external processes that emit klog logging into a |
| 36 | // logtree, leading to niceties like transparently exposing the log severity or |
| 37 | // source file/line. |
| 38 | // |
| 39 | // One caveat, however, is that V-leveled logs will not be translated |
| 40 | // appropriately - anything that the klog-emitter pumps out as Info will be |
| 41 | // directly ingested as Info logging. There is no way to work around this. |
| 42 | // |
| 43 | // Another important limitation is that any written line is interpreted as having |
| 44 | // happened recently (ie. within one hour of the time of execution of this |
| 45 | // function). This is important as klog/glog-formatted loglines don't have a year |
| 46 | // attached, so we have to infer it based on the current timestamp (note: parsed |
| 47 | // lines do not necessarily have their year aleays equal to the current year, as |
| 48 | // the code handles the edge case of parsing a line from the end of a previous |
| 49 | // year at the beginning of the next). |
| 50 | func KLogParser(logger LeveledLogger) io.WriteCloser { |
Serge Bazanski | d9775a6 | 2022-02-15 13:28:55 +0100 | [diff] [blame] | 51 | p, ok := logger.(*leveledPublisher) |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 52 | if !ok { |
| 53 | // Fail fast, as this is a programming error. |
Serge Bazanski | d9775a6 | 2022-02-15 13:28:55 +0100 | [diff] [blame] | 54 | panic("Expected *leveledPublisher in LeveledLogger from supervisor") |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 55 | } |
| 56 | |
| 57 | k := &klogParser{ |
Serge Bazanski | d9775a6 | 2022-02-15 13:28:55 +0100 | [diff] [blame] | 58 | publisher: p, |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 59 | } |
| 60 | // klog seems to have no line length limit. Let's assume some sane sort of default. |
| 61 | k.buffer = logbuffer.NewLineBuffer(1024, k.consumeLine) |
| 62 | return k |
| 63 | } |
| 64 | |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 65 | type klogParser struct { |
Serge Bazanski | d9775a6 | 2022-02-15 13:28:55 +0100 | [diff] [blame] | 66 | publisher *leveledPublisher |
| 67 | buffer *logbuffer.LineBuffer |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 68 | } |
| 69 | |
| 70 | func (k *klogParser) Write(p []byte) (n int, err error) { |
| 71 | return k.buffer.Write(p) |
| 72 | } |
| 73 | |
| 74 | // Close must be called exactly once after the parser is done being used. It will |
| 75 | // pipe any leftover data in its write buffer as one last line to parse. |
| 76 | func (k *klogParser) Close() error { |
| 77 | return k.buffer.Close() |
| 78 | } |
| 79 | |
| 80 | // consumeLine is called by the internal LineBuffer any time a new line is fully |
| 81 | // written. |
| 82 | func (k *klogParser) consumeLine(l *logbuffer.Line) { |
| 83 | p := parse(time.Now(), l.Data) |
| 84 | if p == nil { |
| 85 | // We could instead emit that line as a raw log - however, this would lead to |
| 86 | // interleaving raw logging and leveled logging. |
Serge Bazanski | d9775a6 | 2022-02-15 13:28:55 +0100 | [diff] [blame] | 87 | k.publisher.Errorf("Invalid klog line: %s", l.Data) |
Serge Bazanski | 5456a3c | 2023-03-16 00:29:17 +0100 | [diff] [blame] | 88 | return |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 89 | } |
| 90 | // TODO(q3k): should this be exposed as an API on LeveledLogger? How much should |
| 91 | // we permit library users to 'fake' logs? This would also permit us to get rid |
| 92 | // of the type assertion in KLogParser(). |
| 93 | e := &entry{ |
Serge Bazanski | d9775a6 | 2022-02-15 13:28:55 +0100 | [diff] [blame] | 94 | origin: k.publisher.node.dn, |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 95 | leveled: p, |
| 96 | } |
Serge Bazanski | d9775a6 | 2022-02-15 13:28:55 +0100 | [diff] [blame] | 97 | k.publisher.node.tree.journal.append(e) |
| 98 | k.publisher.node.tree.journal.notify(e) |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 99 | } |
| 100 | |
| 101 | var ( |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 102 | // reKLog matches and parses klog/glog-formatted log lines. Format: I0312 |
| 103 | // 14:20:04.240540 204 shared_informer.go:247] Caches are synced for attach |
| 104 | // detach |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 105 | reKLog = regexp.MustCompile(`^([IEWF])(\d{4})\s+(\d{2}:\d{2}:\d{2}(\.\d+)?)\s+(\d+)\s+([^:]+):(\d+)]\s+(.+)$`) |
| 106 | ) |
| 107 | |
| 108 | // parse attempts to parse a klog-formatted line. Returns nil if the line |
| 109 | // couldn't have been parsed successfully. |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 110 | func parse(now time.Time, s string) *LeveledPayload { |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 111 | parts := reKLog.FindStringSubmatch(s) |
| 112 | if parts == nil { |
| 113 | return nil |
| 114 | } |
| 115 | |
| 116 | severityS := parts[1] |
| 117 | date := parts[2] |
| 118 | timestamp := parts[3] |
| 119 | pid := parts[5] |
| 120 | file := parts[6] |
| 121 | lineS := parts[7] |
| 122 | message := parts[8] |
| 123 | |
| 124 | var severity Severity |
| 125 | switch severityS { |
| 126 | case "I": |
| 127 | severity = INFO |
| 128 | case "W": |
| 129 | severity = WARNING |
| 130 | case "E": |
| 131 | severity = ERROR |
| 132 | case "F": |
| 133 | severity = FATAL |
| 134 | default: |
| 135 | return nil |
| 136 | } |
| 137 | |
| 138 | // Possible race due to klog's/glog's format not containing a year. |
| 139 | // On 2020/12/31 at 23:59:59.99999 a klog logger emits this line: |
| 140 | // |
| 141 | // I1231 23:59:59.99999 1 example.go:10] It's almost 2021! Hooray. |
| 142 | // |
| 143 | // Then, if this library parses that line at 2021/01/01 00:00:00.00001, the |
| 144 | // time will be interpreted as: |
| 145 | // |
| 146 | // 2021/12/31 23:59:59 |
| 147 | // |
| 148 | // So around one year in the future. We attempt to fix this case further down in |
| 149 | // this function. |
| 150 | year := now.Year() |
| 151 | ts, err := parseKLogTime(year, date, timestamp) |
| 152 | if err != nil { |
| 153 | return nil |
| 154 | } |
| 155 | |
| 156 | // Attempt to fix the aforementioned year-in-the-future issue. |
| 157 | if ts.After(now) && ts.Sub(now) > time.Hour { |
| 158 | // Parsed timestamp is in the future. How close is it to One-Year-From-Now? |
| 159 | oyfn := now.Add(time.Hour * 24 * 365) |
| 160 | dOyfn := ts.Sub(oyfn) |
| 161 | // Let's make sure Duration-To-One-Year-From-Now is always positive. This |
| 162 | // simplifies the rest of the checks and papers over some possible edge cases. |
| 163 | if dOyfn < 0 { |
| 164 | dOyfn = -dOyfn |
| 165 | } |
| 166 | |
| 167 | // Okay, is that very close? Then the issue above happened and we should |
| 168 | // attempt to reparse it with last year. We can't just manipulate the date we |
| 169 | // already have, as it's difficult to 'subtract one year'. |
| 170 | if dOyfn < (time.Hour * 24 * 2) { |
| 171 | ts, err = parseKLogTime(year-1, date, timestamp) |
| 172 | if err != nil { |
| 173 | return nil |
| 174 | } |
| 175 | } else { |
| 176 | // Otherwise, we received some seriously time traveling log entry. Abort. |
| 177 | return nil |
| 178 | } |
| 179 | } |
| 180 | |
| 181 | line, err := strconv.Atoi(lineS) |
| 182 | if err != nil { |
| 183 | return nil |
| 184 | } |
| 185 | |
| 186 | // The PID is discarded. |
| 187 | _ = pid |
| 188 | |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 189 | // Finally we have extracted all the data from the line. Inject into the log |
| 190 | // publisher. |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 191 | return &LeveledPayload{ |
| 192 | timestamp: ts, |
Serge Bazanski | 216fe7b | 2021-05-21 18:36:16 +0200 | [diff] [blame] | 193 | severity: severity, |
| 194 | messages: []string{message}, |
| 195 | file: file, |
| 196 | line: line, |
Serge Bazanski | 0ab4eda | 2021-03-12 17:43:57 +0100 | [diff] [blame] | 197 | } |
| 198 | } |
| 199 | |
| 200 | // parseKLogTime parses a klog date and time (eg. "0314", "12:13:14.12345") into |
| 201 | // a time.Time happening at a given year. |
| 202 | func parseKLogTime(year int, d, t string) (time.Time, error) { |
| 203 | var layout string |
| 204 | if strings.Contains(t, ".") { |
| 205 | layout = "2006 0102 15:04:05.000000" |
| 206 | } else { |
| 207 | layout = "2006 0102 15:04:05" |
| 208 | } |
| 209 | // Make up a string that contains the current year. This permits us to parse |
| 210 | // fully into an actual timestamp. |
| 211 | // TODO(q3k): add a timezone? This currently behaves as UTC, which is probably |
| 212 | // what we want, but we should formalize this. |
| 213 | return time.Parse(layout, fmt.Sprintf("%d %s %s", year, d, t)) |
| 214 | } |