blob: da50749e3504d17fb722a8e8b10c9ac2588f9831 [file] [log] [blame]
Lorenz Brun62948542023-01-10 13:28:44 +00001package main
2
3import (
4 "bufio"
5 "bytes"
6 "fmt"
7 "math"
8 "os"
9 "path/filepath"
10 "regexp"
11 "runtime"
12 "sort"
13 "strconv"
14 "strings"
15
16 "github.com/mdlayher/ethtool"
17 "github.com/vishvananda/netlink"
18 "golang.org/x/sys/unix"
19
20 "source.monogon.dev/cloud/agent/api"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020021 "source.monogon.dev/osbase/nvme"
22 "source.monogon.dev/osbase/scsi"
23 "source.monogon.dev/osbase/smbios"
Lorenz Brun62948542023-01-10 13:28:44 +000024)
25
26type hwReportContext struct {
27 node *api.Node
28 errors []error
29}
30
31func (c *hwReportContext) gatherSMBIOS() {
32 smbiosFile, err := os.Open("/sys/firmware/dmi/tables/DMI")
33 if err != nil {
34 c.errors = append(c.errors, fmt.Errorf("unable to open SMBIOS table: %w", err))
35 return
36 }
37 defer smbiosFile.Close()
38 smbTbl, err := smbios.Unmarshal(bufio.NewReader(smbiosFile))
39 if err != nil {
40 c.errors = append(c.errors, fmt.Errorf("unable to parse SMBIOS table: %w", err))
41 return
42 }
43 if smbTbl.SystemInformationRaw != nil {
44 c.node.Manufacturer = smbTbl.SystemInformationRaw.Manufacturer
45 c.node.Product = smbTbl.SystemInformationRaw.ProductName
46 c.node.SerialNumber = smbTbl.SystemInformationRaw.SerialNumber
47 }
Lorenz Brun1cd26962023-04-19 16:10:17 +020048 if smbTbl.BIOSInformationRaw != nil && smbTbl.BIOSInformationRaw.StructureVersion.AtLeast(2, 2) {
49 uefiSupport := smbTbl.BIOSInformationRaw.BIOSCharacteristicsExtensionByte2&smbios.UEFISpecificationSupported != 0
50 if uefiSupport {
51 c.node.EfiSupport = api.EFISupport_EFI_SUPPORTED
52 } else {
53 c.node.EfiSupport = api.EFISupport_EFI_UNSUPPORTED
54 }
55 }
Lorenz Brun62948542023-01-10 13:28:44 +000056 for _, d := range smbTbl.MemoryDevicesRaw {
57 if d.StructureVersion.AtLeast(3, 2) && d.MemoryTechnology != 0x03 {
58 // If MemoryTechnology is available, only count DRAM
59 continue
60 }
61 size, ok := d.SizeBytes()
62 if !ok {
63 continue
64 }
65 c.node.MemoryInstalledBytes += int64(size)
66 }
Lorenz Brun62948542023-01-10 13:28:44 +000067}
68
69var memoryBlockRegexp = regexp.MustCompile("^memory[0-9]+$")
70
71func (c *hwReportContext) gatherMemorySysfs() {
72 blockSizeRaw, err := os.ReadFile("/sys/devices/system/memory/block_size_bytes")
73 if err != nil {
74 c.errors = append(c.errors, fmt.Errorf("unable to read memory block size, CONFIG_MEMORY_HOTPLUG disabled or sandbox?: %w", err))
75 return
76 }
77 blockSize, err := strconv.ParseInt(strings.TrimSpace(string(blockSizeRaw)), 16, 64)
78 if err != nil {
79 c.errors = append(c.errors, fmt.Errorf("failed to parse memory block size (%q): %w", string(blockSizeRaw), err))
80 return
81 }
82 dirEntries, err := os.ReadDir("/sys/devices/system/memory")
83 if err != nil {
84 c.errors = append(c.errors, fmt.Errorf("unable to read sysfs memory devices list: %w", err))
85 return
86 }
87 c.node.MemoryInstalledBytes = 0
88 for _, e := range dirEntries {
89 if memoryBlockRegexp.MatchString(e.Name()) {
90 // This is safe as the regexp does not allow for any dots
91 state, err := os.ReadFile("/sys/devices/system/memory/%s/state")
92 if os.IsNotExist(err) {
93 // Memory hotplug operation raced us
94 continue
95 } else if err != nil {
96 c.errors = append(c.errors, fmt.Errorf("failed to read memory block state for %s: %w", e.Name(), err))
97 continue
98 }
99 if strings.TrimSpace(string(state)) != "online" {
100 // Only count online memory
101 continue
102 }
103 // Each block is one blockSize of memory
104 c.node.MemoryInstalledBytes += blockSize
105 }
106 }
Lorenz Brun62948542023-01-10 13:28:44 +0000107}
108
109func parseCpuinfoAMD64(cpuinfoRaw []byte) (*api.CPU, []error) {
110 // Parse line-by-line, each segment is separated by a line with no colon
111 // character, a segment describes a logical processor if it contains
112 // the key "processor". Keep track of all seen core IDs (physical
113 // processors) and processor IDs (logical processors) in a map to fill
114 // into the structure.
115 s := bufio.NewScanner(bytes.NewReader(cpuinfoRaw))
116 var cpu api.CPU
117 scannedVals := make(map[string]string)
118 seenCoreIDs := make(map[string]bool)
119 seenProcessorIDs := make(map[string]bool)
120 processItem := func() error {
121 if _, ok := scannedVals["processor"]; !ok {
122 // Not a cpu, clear data and return
123 scannedVals = make(map[string]string)
124 return nil
125 }
126 seenProcessorIDs[scannedVals["processor"]] = true
127 seenCoreIDs[scannedVals["core id"]] = true
128 cpu.Model = scannedVals["model name"]
129 cpu.Vendor = scannedVals["vendor_id"]
130 family, err := strconv.Atoi(scannedVals["cpu family"])
131 if err != nil {
Tim Windelschmidt327cdba2024-05-21 13:51:32 +0200132 return fmt.Errorf("unable to parse CPU family to int: %w", err)
Lorenz Brun62948542023-01-10 13:28:44 +0000133 }
134 model, err := strconv.Atoi(scannedVals["model"])
135 if err != nil {
Tim Windelschmidt327cdba2024-05-21 13:51:32 +0200136 return fmt.Errorf("unable to parse CPU model to int: %w", err)
Lorenz Brun62948542023-01-10 13:28:44 +0000137 }
138 stepping, err := strconv.Atoi(scannedVals["stepping"])
139 if err != nil {
Tim Windelschmidt327cdba2024-05-21 13:51:32 +0200140 return fmt.Errorf("unable to parse CPU stepping to int: %w", err)
Lorenz Brun62948542023-01-10 13:28:44 +0000141 }
142 cpu.Architecture = &api.CPU_X86_64_{
143 X86_64: &api.CPU_X86_64{
144 Family: int32(family),
145 Model: int32(model),
146 Stepping: int32(stepping),
147 },
148 }
149 scannedVals = make(map[string]string)
150 return nil
151 }
152 var errs []error
153 for s.Scan() {
154 k, v, ok := strings.Cut(s.Text(), ":")
155 // If there is a colon, add property to scannedVals.
156 if ok {
157 scannedVals[strings.TrimSpace(k)] = strings.TrimSpace(v)
158 continue
159 }
160 // Otherwise this is a segment boundary, process the segment.
161 if err := processItem(); err != nil {
162 errs = append(errs, fmt.Errorf("error parsing cpuinfo block: %w", err))
163 }
164 }
165 // Parse the last segment.
166 if err := processItem(); err != nil {
167 errs = append(errs, fmt.Errorf("error parsing cpuinfo block: %w", err))
168 }
169 cpu.Cores = int32(len(seenCoreIDs))
170 cpu.HardwareThreads = int32(len(seenProcessorIDs))
171 return &cpu, errs
172}
173
174func (c *hwReportContext) gatherCPU() {
175 switch runtime.GOARCH {
176 case "amd64":
177 // Currently a rather simple gatherer with no special NUMA handling
178 cpuinfoRaw, err := os.ReadFile("/proc/cpuinfo")
179 if err != nil {
180 c.errors = append(c.errors, fmt.Errorf("unable to read cpuinfo: %w", err))
181 return
182 }
183 cpu, errs := parseCpuinfoAMD64(cpuinfoRaw)
184 c.errors = append(c.errors, errs...)
185 c.node.Cpu = append(c.node.Cpu, cpu)
186 default:
187 // Currently unimplemented, do nothing
188 c.errors = append(c.errors, fmt.Errorf("architecture %v unsupported by CPU gatherer", runtime.GOARCH))
189 }
Lorenz Brun62948542023-01-10 13:28:44 +0000190}
191
Lorenz Brunaadeb792023-03-27 15:53:56 +0200192var FRUUnavailable = [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
Lorenz Brun62948542023-01-10 13:28:44 +0000193
194func (c *hwReportContext) gatherNVMe(bd *api.BlockDevice, bde os.DirEntry) error {
195 bd.Protocol = api.BlockDevice_NVME
196 nvmeDev, err := nvme.Open("/dev/" + bde.Name())
197 if err != nil {
198 return fmt.Errorf("unable to open NVMe device: %w", err)
199 }
200 defer nvmeDev.Close()
201 identifyData, err := nvmeDev.Identify()
202 if err != nil {
203 return fmt.Errorf("calling Identify failed: %w", err)
204 }
205 bd.DeviceModel = identifyData.ModelNumber
206 bd.SerialNumber = identifyData.SerialNumber
207 if identifyData.FRUGloballyUniqueIdentifier != FRUUnavailable {
208 bd.Wwn = identifyData.FRUGloballyUniqueIdentifier[:]
209 }
210 if healthInfo, err := nvmeDev.GetHealthInfo(); err == nil {
211 bd.AvailableSpareRatio = &healthInfo.AvailableSpare
212 bd.CriticalWarning = healthInfo.HasCriticalWarning()
Lorenz Brunaadeb792023-03-27 15:53:56 +0200213 mediaErrors := int64(healthInfo.MediaAndDataIntegrityErrors)
Lorenz Brun62948542023-01-10 13:28:44 +0000214 bd.MediaErrors = &mediaErrors
215 bd.UsageRatio = &healthInfo.LifeUsed
216 }
217 return nil
218}
219
220func (c *hwReportContext) gatherSCSI(bd *api.BlockDevice, bde os.DirEntry) error {
221 bd.Protocol = api.BlockDevice_SCSI
222 scsiDev, err := scsi.Open("/dev/" + bde.Name())
223 if err != nil {
224 return fmt.Errorf("unable to open SCSI device: %w", err)
225 }
226 defer scsiDev.Close()
227 inquiryData, err := scsiDev.Inquiry()
228 if err != nil {
229 return fmt.Errorf("failed calling INQUIRY: %w", err)
230 }
231 if serial, err := scsiDev.UnitSerialNumber(); err == nil {
232 bd.SerialNumber = serial
233 }
234
235 // SAT-5 R8 Table 14
236 if inquiryData.Vendor == "ATA" { // ATA device behind SAT
237 bd.Protocol = api.BlockDevice_ATA
238 // TODO: ATA Vendor from WWN if available
239 } else { // Normal SCSI device
240 bd.Vendor = inquiryData.Vendor
241 // Attempt to read defect list to populate media error count
242 var mediaErrors int64
243 if defectsLBA, err := scsiDev.ReadDefectDataLBA(false, true); err == nil {
244 mediaErrors = int64(len(defectsLBA))
245 bd.MediaErrors = &mediaErrors
246 } else if defectsPhysical, err := scsiDev.ReadDefectDataPhysical(false, true); err == nil {
247 mediaErrors = int64(len(defectsPhysical))
248 bd.MediaErrors = &mediaErrors
249 }
250 if mediaHealth, err := scsiDev.SolidStateMediaHealth(); err == nil {
251 used := float32(mediaHealth.PercentageUsedEnduranceIndicator) / 100.
252 bd.UsageRatio = &used
253 }
254 if informationalExceptions, err := scsiDev.GetInformationalExceptions(); err == nil {
255 // Only consider FailurePredictionThresholdExceeded-class sense codes critical.
256 // The second commonly reported error here according to random forums are
257 // Warning-class errors, but looking through these they don't indicate imminent
258 // or even permanent errors.
259 bd.CriticalWarning = informationalExceptions.InformationalSenseCode.IsKey(scsi.FailurePredictionThresholdExceeded)
260 }
261 // SCSI has no reporting of available spares, so this will never be populated
262 }
263 bd.DeviceModel = inquiryData.Product
264 return nil
265}
266
267func (c *hwReportContext) gatherBlockDevices() {
268 blockDeviceEntries, err := os.ReadDir("/sys/class/block")
269 if err != nil {
270 c.errors = append(c.errors, fmt.Errorf("unable to read sysfs block device list: %w", err))
271 return
272 }
273 for _, bde := range blockDeviceEntries {
274 sysfsDir := fmt.Sprintf("/sys/class/block/%s", bde.Name())
275 if _, err := os.Stat(sysfsDir + "/partition"); err == nil {
276 // Ignore partitions, we only care about their parents
277 continue
278 }
279 var bd api.BlockDevice
280 if rotational, err := os.ReadFile(sysfsDir + "/queue/rotational"); err == nil {
281 if strings.TrimSpace(string(rotational)) == "1" {
282 bd.Rotational = true
283 }
284 }
285 if sizeRaw, err := os.ReadFile(sysfsDir + "/size"); err == nil {
286 size, err := strconv.ParseInt(strings.TrimSpace(string(sizeRaw)), 10, 64)
287 if err != nil {
288 c.errors = append(c.errors, fmt.Errorf("unable to parse block device %v size: %w", bde.Name(), err))
289 } else {
290 // Linux always defines size in terms of 512 byte blocks regardless
291 // of what the configured logical and physical block sizes are.
292 bd.CapacityBytes = size * 512
293 }
294 }
295 if lbsRaw, err := os.ReadFile(sysfsDir + "/queue/logical_block_size"); err == nil {
296 lbs, err := strconv.ParseInt(strings.TrimSpace(string(lbsRaw)), 10, 32)
297 if err != nil {
298 c.errors = append(c.errors, fmt.Errorf("unable to parse block device %v logical block size: %w", bde.Name(), err))
299 } else {
300 bd.LogicalBlockSizeBytes = int32(lbs)
301 }
302 }
303 if pbsRaw, err := os.ReadFile(sysfsDir + "/queue/physical_block_size"); err == nil {
304 pbs, err := strconv.ParseInt(strings.TrimSpace(string(pbsRaw)), 10, 32)
305 if err != nil {
306 c.errors = append(c.errors, fmt.Errorf("unable to parse physical block size: %w", err))
307 } else {
308 bd.PhysicalBlockSizeBytes = int32(pbs)
309 }
310 }
311 if strings.HasPrefix(bde.Name(), "nvme") {
312 err := c.gatherNVMe(&bd, bde)
313 if err != nil {
314 c.errors = append(c.errors, fmt.Errorf("block device %v: %w", bde.Name(), err))
315 } else {
316 c.node.BlockDevice = append(c.node.BlockDevice, &bd)
317 }
318 }
319 if strings.HasPrefix(bde.Name(), "sd") {
320 err := c.gatherSCSI(&bd, bde)
321 if err != nil {
322 c.errors = append(c.errors, fmt.Errorf("block device %v: %w", bde.Name(), err))
323 } else {
324 c.node.BlockDevice = append(c.node.BlockDevice, &bd)
325 }
326 }
327 if strings.HasPrefix(bde.Name(), "mmcblk") {
328 // TODO: MMC information
329 bd.Protocol = api.BlockDevice_MMC
330 c.node.BlockDevice = append(c.node.BlockDevice, &bd)
331 }
332 }
Lorenz Brun62948542023-01-10 13:28:44 +0000333}
334
335var speedModeRegexp = regexp.MustCompile("^([0-9]+)base")
336
337const mbps = (1000 * 1000) / 8
338
339func (c *hwReportContext) gatherNICs() {
340 links, err := netlink.LinkList()
341 if err != nil {
342 c.errors = append(c.errors, fmt.Errorf("failed to list network links: %w", err))
343 return
344 }
345 ethClient, err := ethtool.New()
346 if err != nil {
347 c.errors = append(c.errors, fmt.Errorf("failed to get ethtool netlink client: %w", err))
348 return
349 }
350 defer ethClient.Close()
351 for _, l := range links {
352 if l.Type() != "device" || len(l.Attrs().HardwareAddr) == 0 {
353 // Not a physical device, ignore
354 continue
355 }
356 var nif api.NetworkInterface
357 nif.Mac = l.Attrs().HardwareAddr
358 mode, err := ethClient.LinkMode(ethtool.Interface{Index: l.Attrs().Index})
359 if err == nil {
360 if mode.SpeedMegabits < math.MaxInt32 {
361 nif.CurrentSpeedBytes = int64(mode.SpeedMegabits) * mbps
362 }
363 speeds := make(map[int64]bool)
364 for _, m := range mode.Ours {
365 // Doing this with a regexp is arguably more future-proof as
366 // we don't need to add each link mode for the detection to
367 // work.
368 modeParts := speedModeRegexp.FindStringSubmatch(m.Name)
369 if len(modeParts) > 0 {
370 speedMegabits, err := strconv.ParseInt(modeParts[1], 10, 64)
371 if err != nil {
372 c.errors = append(c.errors, fmt.Errorf("nic %v: failed to parse %q as integer: %w", l.Attrs().Name, modeParts[1], err))
373 continue
374 }
Tim Windelschmidt55381212024-04-17 02:42:31 +0200375 speeds[speedMegabits*mbps] = true
Lorenz Brun62948542023-01-10 13:28:44 +0000376 }
377 }
378 for s := range speeds {
379 nif.SupportedSpeedBytes = append(nif.SupportedSpeedBytes, s)
380 }
381 // Go randomizes the map keys, sort to make the report stable.
382 sort.Slice(nif.SupportedSpeedBytes, func(i, j int) bool { return nif.SupportedSpeedBytes[i] > nif.SupportedSpeedBytes[j] })
383 }
384 state, err := ethClient.LinkState(ethtool.Interface{Index: l.Attrs().Index})
385 if err == nil {
386 nif.LinkUp = state.Link
387 } else {
388 // We have no ethtool support, fall back to checking if Linux
389 // thinks the link is up.
390 nif.LinkUp = l.Attrs().OperState == netlink.OperUp
391 }
392 // Linux blocks creation of interfaces which conflict with special path
393 // characters, so this path assembly is fine.
394 driverPath, err := os.Readlink("/sys/class/net/" + l.Attrs().Name + "/device/driver")
395 if err == nil {
396 nif.Driver = filepath.Base(driverPath)
397 }
398 c.node.NetworkInterface = append(c.node.NetworkInterface, &nif)
399 }
Lorenz Brun62948542023-01-10 13:28:44 +0000400}
401
402func gatherHWReport() (*api.Node, []error) {
Lorenz Brunaadeb792023-03-27 15:53:56 +0200403 hwReportCtx := hwReportContext{
404 node: &api.Node{},
405 }
Lorenz Brun1cd26962023-04-19 16:10:17 +0200406 hwReportCtx.node.EfiSupport = api.EFISupport_EFI_UNKNOWN
Lorenz Brun62948542023-01-10 13:28:44 +0000407
408 hwReportCtx.gatherCPU()
409 hwReportCtx.gatherSMBIOS()
410 if hwReportCtx.node.MemoryInstalledBytes == 0 {
411 hwReportCtx.gatherMemorySysfs()
412 }
413 var sysinfo unix.Sysinfo_t
414 if err := unix.Sysinfo(&sysinfo); err != nil {
415 hwReportCtx.errors = append(hwReportCtx.errors, fmt.Errorf("unable to execute sysinfo syscall: %w", err))
416 } else {
417 hwReportCtx.node.MemoryUsableRatio = float32(sysinfo.Totalram) / float32(hwReportCtx.node.MemoryInstalledBytes)
418 }
419 hwReportCtx.gatherNICs()
420 hwReportCtx.gatherBlockDevices()
421
Lorenz Brun1cd26962023-04-19 16:10:17 +0200422 if _, err := os.Stat("/sys/firmware/efi/runtime"); err == nil {
423 hwReportCtx.node.EfiSupport = api.EFISupport_EFI_ENABLED
424 }
425
Lorenz Brun62948542023-01-10 13:28:44 +0000426 return hwReportCtx.node, hwReportCtx.errors
427}