blob: 5be56aec5d5919d55b89de6e2615e4be1360fd65 [file] [log] [blame]
Tim Windelschmidt6d33a432025-02-04 14:34:25 +01001// Copyright The Monogon Project Authors.
2// SPDX-License-Identifier: Apache-2.0
3
Lorenz Brun62948542023-01-10 13:28:44 +00004package main
5
6import (
7 "bufio"
8 "bytes"
9 "fmt"
10 "math"
11 "os"
12 "path/filepath"
13 "regexp"
14 "runtime"
15 "sort"
16 "strconv"
17 "strings"
18
19 "github.com/mdlayher/ethtool"
20 "github.com/vishvananda/netlink"
21 "golang.org/x/sys/unix"
22
23 "source.monogon.dev/cloud/agent/api"
Tim Windelschmidt9f21f532024-05-07 15:14:20 +020024 "source.monogon.dev/osbase/nvme"
25 "source.monogon.dev/osbase/scsi"
26 "source.monogon.dev/osbase/smbios"
Lorenz Brun62948542023-01-10 13:28:44 +000027)
28
29type hwReportContext struct {
30 node *api.Node
31 errors []error
32}
33
34func (c *hwReportContext) gatherSMBIOS() {
35 smbiosFile, err := os.Open("/sys/firmware/dmi/tables/DMI")
36 if err != nil {
37 c.errors = append(c.errors, fmt.Errorf("unable to open SMBIOS table: %w", err))
38 return
39 }
40 defer smbiosFile.Close()
41 smbTbl, err := smbios.Unmarshal(bufio.NewReader(smbiosFile))
42 if err != nil {
43 c.errors = append(c.errors, fmt.Errorf("unable to parse SMBIOS table: %w", err))
44 return
45 }
46 if smbTbl.SystemInformationRaw != nil {
47 c.node.Manufacturer = smbTbl.SystemInformationRaw.Manufacturer
48 c.node.Product = smbTbl.SystemInformationRaw.ProductName
49 c.node.SerialNumber = smbTbl.SystemInformationRaw.SerialNumber
50 }
Lorenz Brun1cd26962023-04-19 16:10:17 +020051 if smbTbl.BIOSInformationRaw != nil && smbTbl.BIOSInformationRaw.StructureVersion.AtLeast(2, 2) {
52 uefiSupport := smbTbl.BIOSInformationRaw.BIOSCharacteristicsExtensionByte2&smbios.UEFISpecificationSupported != 0
53 if uefiSupport {
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +010054 c.node.EfiSupport = api.EFISupport_EFI_SUPPORT_SUPPORTED
Lorenz Brun1cd26962023-04-19 16:10:17 +020055 } else {
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +010056 c.node.EfiSupport = api.EFISupport_EFI_SUPPORT_UNSUPPORTED
Lorenz Brun1cd26962023-04-19 16:10:17 +020057 }
58 }
Lorenz Brun62948542023-01-10 13:28:44 +000059 for _, d := range smbTbl.MemoryDevicesRaw {
60 if d.StructureVersion.AtLeast(3, 2) && d.MemoryTechnology != 0x03 {
61 // If MemoryTechnology is available, only count DRAM
62 continue
63 }
64 size, ok := d.SizeBytes()
65 if !ok {
66 continue
67 }
68 c.node.MemoryInstalledBytes += int64(size)
69 }
Lorenz Brun62948542023-01-10 13:28:44 +000070}
71
72var memoryBlockRegexp = regexp.MustCompile("^memory[0-9]+$")
73
74func (c *hwReportContext) gatherMemorySysfs() {
75 blockSizeRaw, err := os.ReadFile("/sys/devices/system/memory/block_size_bytes")
76 if err != nil {
77 c.errors = append(c.errors, fmt.Errorf("unable to read memory block size, CONFIG_MEMORY_HOTPLUG disabled or sandbox?: %w", err))
78 return
79 }
80 blockSize, err := strconv.ParseInt(strings.TrimSpace(string(blockSizeRaw)), 16, 64)
81 if err != nil {
82 c.errors = append(c.errors, fmt.Errorf("failed to parse memory block size (%q): %w", string(blockSizeRaw), err))
83 return
84 }
85 dirEntries, err := os.ReadDir("/sys/devices/system/memory")
86 if err != nil {
87 c.errors = append(c.errors, fmt.Errorf("unable to read sysfs memory devices list: %w", err))
88 return
89 }
90 c.node.MemoryInstalledBytes = 0
91 for _, e := range dirEntries {
92 if memoryBlockRegexp.MatchString(e.Name()) {
93 // This is safe as the regexp does not allow for any dots
94 state, err := os.ReadFile("/sys/devices/system/memory/%s/state")
95 if os.IsNotExist(err) {
96 // Memory hotplug operation raced us
97 continue
98 } else if err != nil {
99 c.errors = append(c.errors, fmt.Errorf("failed to read memory block state for %s: %w", e.Name(), err))
100 continue
101 }
102 if strings.TrimSpace(string(state)) != "online" {
103 // Only count online memory
104 continue
105 }
106 // Each block is one blockSize of memory
107 c.node.MemoryInstalledBytes += blockSize
108 }
109 }
Lorenz Brun62948542023-01-10 13:28:44 +0000110}
111
112func parseCpuinfoAMD64(cpuinfoRaw []byte) (*api.CPU, []error) {
113 // Parse line-by-line, each segment is separated by a line with no colon
114 // character, a segment describes a logical processor if it contains
115 // the key "processor". Keep track of all seen core IDs (physical
116 // processors) and processor IDs (logical processors) in a map to fill
117 // into the structure.
118 s := bufio.NewScanner(bytes.NewReader(cpuinfoRaw))
119 var cpu api.CPU
120 scannedVals := make(map[string]string)
121 seenCoreIDs := make(map[string]bool)
122 seenProcessorIDs := make(map[string]bool)
123 processItem := func() error {
124 if _, ok := scannedVals["processor"]; !ok {
125 // Not a cpu, clear data and return
126 scannedVals = make(map[string]string)
127 return nil
128 }
129 seenProcessorIDs[scannedVals["processor"]] = true
130 seenCoreIDs[scannedVals["core id"]] = true
131 cpu.Model = scannedVals["model name"]
132 cpu.Vendor = scannedVals["vendor_id"]
133 family, err := strconv.Atoi(scannedVals["cpu family"])
134 if err != nil {
Tim Windelschmidt327cdba2024-05-21 13:51:32 +0200135 return fmt.Errorf("unable to parse CPU family to int: %w", err)
Lorenz Brun62948542023-01-10 13:28:44 +0000136 }
137 model, err := strconv.Atoi(scannedVals["model"])
138 if err != nil {
Tim Windelschmidt327cdba2024-05-21 13:51:32 +0200139 return fmt.Errorf("unable to parse CPU model to int: %w", err)
Lorenz Brun62948542023-01-10 13:28:44 +0000140 }
141 stepping, err := strconv.Atoi(scannedVals["stepping"])
142 if err != nil {
Tim Windelschmidt327cdba2024-05-21 13:51:32 +0200143 return fmt.Errorf("unable to parse CPU stepping to int: %w", err)
Lorenz Brun62948542023-01-10 13:28:44 +0000144 }
145 cpu.Architecture = &api.CPU_X86_64_{
146 X86_64: &api.CPU_X86_64{
147 Family: int32(family),
148 Model: int32(model),
149 Stepping: int32(stepping),
150 },
151 }
152 scannedVals = make(map[string]string)
153 return nil
154 }
155 var errs []error
156 for s.Scan() {
157 k, v, ok := strings.Cut(s.Text(), ":")
158 // If there is a colon, add property to scannedVals.
159 if ok {
160 scannedVals[strings.TrimSpace(k)] = strings.TrimSpace(v)
161 continue
162 }
163 // Otherwise this is a segment boundary, process the segment.
164 if err := processItem(); err != nil {
165 errs = append(errs, fmt.Errorf("error parsing cpuinfo block: %w", err))
166 }
167 }
168 // Parse the last segment.
169 if err := processItem(); err != nil {
170 errs = append(errs, fmt.Errorf("error parsing cpuinfo block: %w", err))
171 }
172 cpu.Cores = int32(len(seenCoreIDs))
173 cpu.HardwareThreads = int32(len(seenProcessorIDs))
174 return &cpu, errs
175}
176
177func (c *hwReportContext) gatherCPU() {
178 switch runtime.GOARCH {
179 case "amd64":
180 // Currently a rather simple gatherer with no special NUMA handling
181 cpuinfoRaw, err := os.ReadFile("/proc/cpuinfo")
182 if err != nil {
183 c.errors = append(c.errors, fmt.Errorf("unable to read cpuinfo: %w", err))
184 return
185 }
186 cpu, errs := parseCpuinfoAMD64(cpuinfoRaw)
187 c.errors = append(c.errors, errs...)
188 c.node.Cpu = append(c.node.Cpu, cpu)
189 default:
190 // Currently unimplemented, do nothing
191 c.errors = append(c.errors, fmt.Errorf("architecture %v unsupported by CPU gatherer", runtime.GOARCH))
192 }
Lorenz Brun62948542023-01-10 13:28:44 +0000193}
194
Lorenz Brunaadeb792023-03-27 15:53:56 +0200195var FRUUnavailable = [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
Lorenz Brun62948542023-01-10 13:28:44 +0000196
197func (c *hwReportContext) gatherNVMe(bd *api.BlockDevice, bde os.DirEntry) error {
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100198 bd.Protocol = api.BlockDevice_PROTOCOL_NVME
Lorenz Brun62948542023-01-10 13:28:44 +0000199 nvmeDev, err := nvme.Open("/dev/" + bde.Name())
200 if err != nil {
201 return fmt.Errorf("unable to open NVMe device: %w", err)
202 }
203 defer nvmeDev.Close()
204 identifyData, err := nvmeDev.Identify()
205 if err != nil {
206 return fmt.Errorf("calling Identify failed: %w", err)
207 }
208 bd.DeviceModel = identifyData.ModelNumber
209 bd.SerialNumber = identifyData.SerialNumber
210 if identifyData.FRUGloballyUniqueIdentifier != FRUUnavailable {
211 bd.Wwn = identifyData.FRUGloballyUniqueIdentifier[:]
212 }
213 if healthInfo, err := nvmeDev.GetHealthInfo(); err == nil {
214 bd.AvailableSpareRatio = &healthInfo.AvailableSpare
215 bd.CriticalWarning = healthInfo.HasCriticalWarning()
Lorenz Brunaadeb792023-03-27 15:53:56 +0200216 mediaErrors := int64(healthInfo.MediaAndDataIntegrityErrors)
Lorenz Brun62948542023-01-10 13:28:44 +0000217 bd.MediaErrors = &mediaErrors
218 bd.UsageRatio = &healthInfo.LifeUsed
219 }
220 return nil
221}
222
223func (c *hwReportContext) gatherSCSI(bd *api.BlockDevice, bde os.DirEntry) error {
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100224 bd.Protocol = api.BlockDevice_PROTOCOL_SCSI
Lorenz Brun62948542023-01-10 13:28:44 +0000225 scsiDev, err := scsi.Open("/dev/" + bde.Name())
226 if err != nil {
227 return fmt.Errorf("unable to open SCSI device: %w", err)
228 }
229 defer scsiDev.Close()
230 inquiryData, err := scsiDev.Inquiry()
231 if err != nil {
232 return fmt.Errorf("failed calling INQUIRY: %w", err)
233 }
234 if serial, err := scsiDev.UnitSerialNumber(); err == nil {
235 bd.SerialNumber = serial
236 }
237
238 // SAT-5 R8 Table 14
239 if inquiryData.Vendor == "ATA" { // ATA device behind SAT
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100240 bd.Protocol = api.BlockDevice_PROTOCOL_ATA
Lorenz Brun62948542023-01-10 13:28:44 +0000241 // TODO: ATA Vendor from WWN if available
242 } else { // Normal SCSI device
243 bd.Vendor = inquiryData.Vendor
244 // Attempt to read defect list to populate media error count
245 var mediaErrors int64
246 if defectsLBA, err := scsiDev.ReadDefectDataLBA(false, true); err == nil {
247 mediaErrors = int64(len(defectsLBA))
248 bd.MediaErrors = &mediaErrors
249 } else if defectsPhysical, err := scsiDev.ReadDefectDataPhysical(false, true); err == nil {
250 mediaErrors = int64(len(defectsPhysical))
251 bd.MediaErrors = &mediaErrors
252 }
253 if mediaHealth, err := scsiDev.SolidStateMediaHealth(); err == nil {
254 used := float32(mediaHealth.PercentageUsedEnduranceIndicator) / 100.
255 bd.UsageRatio = &used
256 }
257 if informationalExceptions, err := scsiDev.GetInformationalExceptions(); err == nil {
258 // Only consider FailurePredictionThresholdExceeded-class sense codes critical.
259 // The second commonly reported error here according to random forums are
260 // Warning-class errors, but looking through these they don't indicate imminent
261 // or even permanent errors.
262 bd.CriticalWarning = informationalExceptions.InformationalSenseCode.IsKey(scsi.FailurePredictionThresholdExceeded)
263 }
264 // SCSI has no reporting of available spares, so this will never be populated
265 }
266 bd.DeviceModel = inquiryData.Product
267 return nil
268}
269
270func (c *hwReportContext) gatherBlockDevices() {
271 blockDeviceEntries, err := os.ReadDir("/sys/class/block")
272 if err != nil {
273 c.errors = append(c.errors, fmt.Errorf("unable to read sysfs block device list: %w", err))
274 return
275 }
276 for _, bde := range blockDeviceEntries {
277 sysfsDir := fmt.Sprintf("/sys/class/block/%s", bde.Name())
278 if _, err := os.Stat(sysfsDir + "/partition"); err == nil {
279 // Ignore partitions, we only care about their parents
280 continue
281 }
282 var bd api.BlockDevice
283 if rotational, err := os.ReadFile(sysfsDir + "/queue/rotational"); err == nil {
284 if strings.TrimSpace(string(rotational)) == "1" {
285 bd.Rotational = true
286 }
287 }
288 if sizeRaw, err := os.ReadFile(sysfsDir + "/size"); err == nil {
289 size, err := strconv.ParseInt(strings.TrimSpace(string(sizeRaw)), 10, 64)
290 if err != nil {
291 c.errors = append(c.errors, fmt.Errorf("unable to parse block device %v size: %w", bde.Name(), err))
292 } else {
293 // Linux always defines size in terms of 512 byte blocks regardless
294 // of what the configured logical and physical block sizes are.
295 bd.CapacityBytes = size * 512
296 }
297 }
298 if lbsRaw, err := os.ReadFile(sysfsDir + "/queue/logical_block_size"); err == nil {
299 lbs, err := strconv.ParseInt(strings.TrimSpace(string(lbsRaw)), 10, 32)
300 if err != nil {
301 c.errors = append(c.errors, fmt.Errorf("unable to parse block device %v logical block size: %w", bde.Name(), err))
302 } else {
303 bd.LogicalBlockSizeBytes = int32(lbs)
304 }
305 }
306 if pbsRaw, err := os.ReadFile(sysfsDir + "/queue/physical_block_size"); err == nil {
307 pbs, err := strconv.ParseInt(strings.TrimSpace(string(pbsRaw)), 10, 32)
308 if err != nil {
309 c.errors = append(c.errors, fmt.Errorf("unable to parse physical block size: %w", err))
310 } else {
311 bd.PhysicalBlockSizeBytes = int32(pbs)
312 }
313 }
314 if strings.HasPrefix(bde.Name(), "nvme") {
315 err := c.gatherNVMe(&bd, bde)
316 if err != nil {
317 c.errors = append(c.errors, fmt.Errorf("block device %v: %w", bde.Name(), err))
318 } else {
319 c.node.BlockDevice = append(c.node.BlockDevice, &bd)
320 }
321 }
322 if strings.HasPrefix(bde.Name(), "sd") {
323 err := c.gatherSCSI(&bd, bde)
324 if err != nil {
325 c.errors = append(c.errors, fmt.Errorf("block device %v: %w", bde.Name(), err))
326 } else {
327 c.node.BlockDevice = append(c.node.BlockDevice, &bd)
328 }
329 }
330 if strings.HasPrefix(bde.Name(), "mmcblk") {
331 // TODO: MMC information
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100332 bd.Protocol = api.BlockDevice_PROTOCOL_MMC
Lorenz Brun62948542023-01-10 13:28:44 +0000333 c.node.BlockDevice = append(c.node.BlockDevice, &bd)
334 }
335 }
Lorenz Brun62948542023-01-10 13:28:44 +0000336}
337
338var speedModeRegexp = regexp.MustCompile("^([0-9]+)base")
339
340const mbps = (1000 * 1000) / 8
341
342func (c *hwReportContext) gatherNICs() {
343 links, err := netlink.LinkList()
344 if err != nil {
345 c.errors = append(c.errors, fmt.Errorf("failed to list network links: %w", err))
346 return
347 }
348 ethClient, err := ethtool.New()
349 if err != nil {
350 c.errors = append(c.errors, fmt.Errorf("failed to get ethtool netlink client: %w", err))
351 return
352 }
353 defer ethClient.Close()
354 for _, l := range links {
355 if l.Type() != "device" || len(l.Attrs().HardwareAddr) == 0 {
356 // Not a physical device, ignore
357 continue
358 }
359 var nif api.NetworkInterface
360 nif.Mac = l.Attrs().HardwareAddr
361 mode, err := ethClient.LinkMode(ethtool.Interface{Index: l.Attrs().Index})
362 if err == nil {
363 if mode.SpeedMegabits < math.MaxInt32 {
364 nif.CurrentSpeedBytes = int64(mode.SpeedMegabits) * mbps
365 }
366 speeds := make(map[int64]bool)
367 for _, m := range mode.Ours {
368 // Doing this with a regexp is arguably more future-proof as
369 // we don't need to add each link mode for the detection to
370 // work.
371 modeParts := speedModeRegexp.FindStringSubmatch(m.Name)
372 if len(modeParts) > 0 {
373 speedMegabits, err := strconv.ParseInt(modeParts[1], 10, 64)
374 if err != nil {
375 c.errors = append(c.errors, fmt.Errorf("nic %v: failed to parse %q as integer: %w", l.Attrs().Name, modeParts[1], err))
376 continue
377 }
Tim Windelschmidt55381212024-04-17 02:42:31 +0200378 speeds[speedMegabits*mbps] = true
Lorenz Brun62948542023-01-10 13:28:44 +0000379 }
380 }
381 for s := range speeds {
382 nif.SupportedSpeedBytes = append(nif.SupportedSpeedBytes, s)
383 }
384 // Go randomizes the map keys, sort to make the report stable.
385 sort.Slice(nif.SupportedSpeedBytes, func(i, j int) bool { return nif.SupportedSpeedBytes[i] > nif.SupportedSpeedBytes[j] })
386 }
387 state, err := ethClient.LinkState(ethtool.Interface{Index: l.Attrs().Index})
388 if err == nil {
389 nif.LinkUp = state.Link
390 } else {
391 // We have no ethtool support, fall back to checking if Linux
392 // thinks the link is up.
393 nif.LinkUp = l.Attrs().OperState == netlink.OperUp
394 }
395 // Linux blocks creation of interfaces which conflict with special path
396 // characters, so this path assembly is fine.
397 driverPath, err := os.Readlink("/sys/class/net/" + l.Attrs().Name + "/device/driver")
398 if err == nil {
399 nif.Driver = filepath.Base(driverPath)
400 }
401 c.node.NetworkInterface = append(c.node.NetworkInterface, &nif)
402 }
Lorenz Brun62948542023-01-10 13:28:44 +0000403}
404
405func gatherHWReport() (*api.Node, []error) {
Lorenz Brunaadeb792023-03-27 15:53:56 +0200406 hwReportCtx := hwReportContext{
407 node: &api.Node{},
408 }
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100409 hwReportCtx.node.EfiSupport = api.EFISupport_EFI_SUPPORT_UNKNOWN
Lorenz Brun62948542023-01-10 13:28:44 +0000410
411 hwReportCtx.gatherCPU()
412 hwReportCtx.gatherSMBIOS()
413 if hwReportCtx.node.MemoryInstalledBytes == 0 {
414 hwReportCtx.gatherMemorySysfs()
415 }
416 var sysinfo unix.Sysinfo_t
417 if err := unix.Sysinfo(&sysinfo); err != nil {
418 hwReportCtx.errors = append(hwReportCtx.errors, fmt.Errorf("unable to execute sysinfo syscall: %w", err))
419 } else {
420 hwReportCtx.node.MemoryUsableRatio = float32(sysinfo.Totalram) / float32(hwReportCtx.node.MemoryInstalledBytes)
421 }
422 hwReportCtx.gatherNICs()
423 hwReportCtx.gatherBlockDevices()
424
Lorenz Brun1cd26962023-04-19 16:10:17 +0200425 if _, err := os.Stat("/sys/firmware/efi/runtime"); err == nil {
Tim Windelschmidta10d0cb2025-01-13 14:44:15 +0100426 hwReportCtx.node.EfiSupport = api.EFISupport_EFI_SUPPORT_ENABLED
Lorenz Brun1cd26962023-04-19 16:10:17 +0200427 }
428
Lorenz Brun62948542023-01-10 13:28:44 +0000429 return hwReportCtx.node, hwReportCtx.errors
430}