|  | package main | 
|  |  | 
|  | import ( | 
|  | "bufio" | 
|  | "bytes" | 
|  | "fmt" | 
|  | "math" | 
|  | "os" | 
|  | "path/filepath" | 
|  | "regexp" | 
|  | "runtime" | 
|  | "sort" | 
|  | "strconv" | 
|  | "strings" | 
|  |  | 
|  | "github.com/mdlayher/ethtool" | 
|  | "github.com/vishvananda/netlink" | 
|  | "golang.org/x/sys/unix" | 
|  |  | 
|  | "source.monogon.dev/cloud/agent/api" | 
|  | "source.monogon.dev/metropolis/pkg/nvme" | 
|  | "source.monogon.dev/metropolis/pkg/scsi" | 
|  | "source.monogon.dev/metropolis/pkg/smbios" | 
|  | ) | 
|  |  | 
|  | type hwReportContext struct { | 
|  | node   *api.Node | 
|  | errors []error | 
|  | } | 
|  |  | 
|  | func (c *hwReportContext) gatherSMBIOS() { | 
|  | smbiosFile, err := os.Open("/sys/firmware/dmi/tables/DMI") | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("unable to open SMBIOS table: %w", err)) | 
|  | return | 
|  | } | 
|  | defer smbiosFile.Close() | 
|  | smbTbl, err := smbios.Unmarshal(bufio.NewReader(smbiosFile)) | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("unable to parse SMBIOS table: %w", err)) | 
|  | return | 
|  | } | 
|  | if smbTbl.SystemInformationRaw != nil { | 
|  | c.node.Manufacturer = smbTbl.SystemInformationRaw.Manufacturer | 
|  | c.node.Product = smbTbl.SystemInformationRaw.ProductName | 
|  | c.node.SerialNumber = smbTbl.SystemInformationRaw.SerialNumber | 
|  | } | 
|  | if smbTbl.BIOSInformationRaw != nil && smbTbl.BIOSInformationRaw.StructureVersion.AtLeast(2, 2) { | 
|  | uefiSupport := smbTbl.BIOSInformationRaw.BIOSCharacteristicsExtensionByte2&smbios.UEFISpecificationSupported != 0 | 
|  | if uefiSupport { | 
|  | c.node.EfiSupport = api.EFISupport_EFI_SUPPORTED | 
|  | } else { | 
|  | c.node.EfiSupport = api.EFISupport_EFI_UNSUPPORTED | 
|  | } | 
|  | } | 
|  | for _, d := range smbTbl.MemoryDevicesRaw { | 
|  | if d.StructureVersion.AtLeast(3, 2) && d.MemoryTechnology != 0x03 { | 
|  | // If MemoryTechnology is available, only count DRAM | 
|  | continue | 
|  | } | 
|  | size, ok := d.SizeBytes() | 
|  | if !ok { | 
|  | continue | 
|  | } | 
|  | c.node.MemoryInstalledBytes += int64(size) | 
|  | } | 
|  | return | 
|  | } | 
|  |  | 
|  | var memoryBlockRegexp = regexp.MustCompile("^memory[0-9]+$") | 
|  |  | 
|  | func (c *hwReportContext) gatherMemorySysfs() { | 
|  | blockSizeRaw, err := os.ReadFile("/sys/devices/system/memory/block_size_bytes") | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("unable to read memory block size, CONFIG_MEMORY_HOTPLUG disabled or sandbox?: %w", err)) | 
|  | return | 
|  | } | 
|  | blockSize, err := strconv.ParseInt(strings.TrimSpace(string(blockSizeRaw)), 16, 64) | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("failed to parse memory block size (%q): %w", string(blockSizeRaw), err)) | 
|  | return | 
|  | } | 
|  | dirEntries, err := os.ReadDir("/sys/devices/system/memory") | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("unable to read sysfs memory devices list: %w", err)) | 
|  | return | 
|  | } | 
|  | c.node.MemoryInstalledBytes = 0 | 
|  | for _, e := range dirEntries { | 
|  | if memoryBlockRegexp.MatchString(e.Name()) { | 
|  | // This is safe as the regexp does not allow for any dots | 
|  | state, err := os.ReadFile("/sys/devices/system/memory/%s/state") | 
|  | if os.IsNotExist(err) { | 
|  | // Memory hotplug operation raced us | 
|  | continue | 
|  | } else if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("failed to read memory block state for %s: %w", e.Name(), err)) | 
|  | continue | 
|  | } | 
|  | if strings.TrimSpace(string(state)) != "online" { | 
|  | // Only count online memory | 
|  | continue | 
|  | } | 
|  | // Each block is one blockSize of memory | 
|  | c.node.MemoryInstalledBytes += blockSize | 
|  | } | 
|  | } | 
|  | return | 
|  | } | 
|  |  | 
|  | func parseCpuinfoAMD64(cpuinfoRaw []byte) (*api.CPU, []error) { | 
|  | // Parse line-by-line, each segment is separated by a line with no colon | 
|  | // character, a  segment describes a logical processor if it contains | 
|  | // the key "processor". Keep track of all seen core IDs (physical | 
|  | // processors) and processor IDs (logical processors) in a map to fill | 
|  | // into the structure. | 
|  | s := bufio.NewScanner(bytes.NewReader(cpuinfoRaw)) | 
|  | var cpu api.CPU | 
|  | scannedVals := make(map[string]string) | 
|  | seenCoreIDs := make(map[string]bool) | 
|  | seenProcessorIDs := make(map[string]bool) | 
|  | processItem := func() error { | 
|  | if _, ok := scannedVals["processor"]; !ok { | 
|  | // Not a cpu, clear data and return | 
|  | scannedVals = make(map[string]string) | 
|  | return nil | 
|  | } | 
|  | seenProcessorIDs[scannedVals["processor"]] = true | 
|  | seenCoreIDs[scannedVals["core id"]] = true | 
|  | cpu.Model = scannedVals["model name"] | 
|  | cpu.Vendor = scannedVals["vendor_id"] | 
|  | family, err := strconv.Atoi(scannedVals["cpu family"]) | 
|  | if err != nil { | 
|  | return fmt.Errorf("unable to parse CPU family to int: %v", err) | 
|  | } | 
|  | model, err := strconv.Atoi(scannedVals["model"]) | 
|  | if err != nil { | 
|  | return fmt.Errorf("unable to parse CPU model to int: %v", err) | 
|  | } | 
|  | stepping, err := strconv.Atoi(scannedVals["stepping"]) | 
|  | if err != nil { | 
|  | return fmt.Errorf("unable to parse CPU stepping to int: %v", err) | 
|  | } | 
|  | cpu.Architecture = &api.CPU_X86_64_{ | 
|  | X86_64: &api.CPU_X86_64{ | 
|  | Family:   int32(family), | 
|  | Model:    int32(model), | 
|  | Stepping: int32(stepping), | 
|  | }, | 
|  | } | 
|  | scannedVals = make(map[string]string) | 
|  | return nil | 
|  | } | 
|  | var errs []error | 
|  | for s.Scan() { | 
|  | k, v, ok := strings.Cut(s.Text(), ":") | 
|  | // If there is a colon, add property to scannedVals. | 
|  | if ok { | 
|  | scannedVals[strings.TrimSpace(k)] = strings.TrimSpace(v) | 
|  | continue | 
|  | } | 
|  | // Otherwise this is a segment boundary, process the segment. | 
|  | if err := processItem(); err != nil { | 
|  | errs = append(errs, fmt.Errorf("error parsing cpuinfo block: %w", err)) | 
|  | } | 
|  | } | 
|  | // Parse the last segment. | 
|  | if err := processItem(); err != nil { | 
|  | errs = append(errs, fmt.Errorf("error parsing cpuinfo block: %w", err)) | 
|  | } | 
|  | cpu.Cores = int32(len(seenCoreIDs)) | 
|  | cpu.HardwareThreads = int32(len(seenProcessorIDs)) | 
|  | return &cpu, errs | 
|  | } | 
|  |  | 
|  | func (c *hwReportContext) gatherCPU() { | 
|  | switch runtime.GOARCH { | 
|  | case "amd64": | 
|  | // Currently a rather simple gatherer with no special NUMA handling | 
|  | cpuinfoRaw, err := os.ReadFile("/proc/cpuinfo") | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("unable to read cpuinfo: %w", err)) | 
|  | return | 
|  | } | 
|  | cpu, errs := parseCpuinfoAMD64(cpuinfoRaw) | 
|  | c.errors = append(c.errors, errs...) | 
|  | c.node.Cpu = append(c.node.Cpu, cpu) | 
|  | default: | 
|  | // Currently unimplemented, do nothing | 
|  | c.errors = append(c.errors, fmt.Errorf("architecture %v unsupported by CPU gatherer", runtime.GOARCH)) | 
|  | } | 
|  | return | 
|  | } | 
|  |  | 
|  | var FRUUnavailable = [16]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} | 
|  |  | 
|  | func (c *hwReportContext) gatherNVMe(bd *api.BlockDevice, bde os.DirEntry) error { | 
|  | bd.Protocol = api.BlockDevice_NVME | 
|  | nvmeDev, err := nvme.Open("/dev/" + bde.Name()) | 
|  | if err != nil { | 
|  | return fmt.Errorf("unable to open NVMe device: %w", err) | 
|  | } | 
|  | defer nvmeDev.Close() | 
|  | identifyData, err := nvmeDev.Identify() | 
|  | if err != nil { | 
|  | return fmt.Errorf("calling Identify failed: %w", err) | 
|  | } | 
|  | bd.DeviceModel = identifyData.ModelNumber | 
|  | bd.SerialNumber = identifyData.SerialNumber | 
|  | if identifyData.FRUGloballyUniqueIdentifier != FRUUnavailable { | 
|  | bd.Wwn = identifyData.FRUGloballyUniqueIdentifier[:] | 
|  | } | 
|  | if healthInfo, err := nvmeDev.GetHealthInfo(); err == nil { | 
|  | bd.AvailableSpareRatio = &healthInfo.AvailableSpare | 
|  | bd.CriticalWarning = healthInfo.HasCriticalWarning() | 
|  | mediaErrors := int64(healthInfo.MediaAndDataIntegrityErrors) | 
|  | bd.MediaErrors = &mediaErrors | 
|  | bd.UsageRatio = &healthInfo.LifeUsed | 
|  | } | 
|  | return nil | 
|  | } | 
|  |  | 
|  | func (c *hwReportContext) gatherSCSI(bd *api.BlockDevice, bde os.DirEntry) error { | 
|  | bd.Protocol = api.BlockDevice_SCSI | 
|  | scsiDev, err := scsi.Open("/dev/" + bde.Name()) | 
|  | if err != nil { | 
|  | return fmt.Errorf("unable to open SCSI device: %w", err) | 
|  | } | 
|  | defer scsiDev.Close() | 
|  | inquiryData, err := scsiDev.Inquiry() | 
|  | if err != nil { | 
|  | return fmt.Errorf("failed calling INQUIRY: %w", err) | 
|  | } | 
|  | if serial, err := scsiDev.UnitSerialNumber(); err == nil { | 
|  | bd.SerialNumber = serial | 
|  | } | 
|  |  | 
|  | // SAT-5 R8 Table 14 | 
|  | if inquiryData.Vendor == "ATA" { // ATA device behind SAT | 
|  | bd.Protocol = api.BlockDevice_ATA | 
|  | // TODO: ATA Vendor from WWN if available | 
|  | } else { // Normal SCSI device | 
|  | bd.Vendor = inquiryData.Vendor | 
|  | // Attempt to read defect list to populate media error count | 
|  | var mediaErrors int64 | 
|  | if defectsLBA, err := scsiDev.ReadDefectDataLBA(false, true); err == nil { | 
|  | mediaErrors = int64(len(defectsLBA)) | 
|  | bd.MediaErrors = &mediaErrors | 
|  | } else if defectsPhysical, err := scsiDev.ReadDefectDataPhysical(false, true); err == nil { | 
|  | mediaErrors = int64(len(defectsPhysical)) | 
|  | bd.MediaErrors = &mediaErrors | 
|  | } | 
|  | if mediaHealth, err := scsiDev.SolidStateMediaHealth(); err == nil { | 
|  | used := float32(mediaHealth.PercentageUsedEnduranceIndicator) / 100. | 
|  | bd.UsageRatio = &used | 
|  | } | 
|  | if informationalExceptions, err := scsiDev.GetInformationalExceptions(); err == nil { | 
|  | // Only consider FailurePredictionThresholdExceeded-class sense codes critical. | 
|  | // The second commonly reported error here according to random forums are | 
|  | // Warning-class errors, but looking through these they don't indicate imminent | 
|  | // or even permanent errors. | 
|  | bd.CriticalWarning = informationalExceptions.InformationalSenseCode.IsKey(scsi.FailurePredictionThresholdExceeded) | 
|  | } | 
|  | // SCSI has no reporting of available spares, so this will never be populated | 
|  | } | 
|  | bd.DeviceModel = inquiryData.Product | 
|  | return nil | 
|  | } | 
|  |  | 
|  | func (c *hwReportContext) gatherBlockDevices() { | 
|  | blockDeviceEntries, err := os.ReadDir("/sys/class/block") | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("unable to read sysfs block device list: %w", err)) | 
|  | return | 
|  | } | 
|  | for _, bde := range blockDeviceEntries { | 
|  | sysfsDir := fmt.Sprintf("/sys/class/block/%s", bde.Name()) | 
|  | if _, err := os.Stat(sysfsDir + "/partition"); err == nil { | 
|  | // Ignore partitions, we only care about their parents | 
|  | continue | 
|  | } | 
|  | var bd api.BlockDevice | 
|  | if rotational, err := os.ReadFile(sysfsDir + "/queue/rotational"); err == nil { | 
|  | if strings.TrimSpace(string(rotational)) == "1" { | 
|  | bd.Rotational = true | 
|  | } | 
|  | } | 
|  | if sizeRaw, err := os.ReadFile(sysfsDir + "/size"); err == nil { | 
|  | size, err := strconv.ParseInt(strings.TrimSpace(string(sizeRaw)), 10, 64) | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("unable to parse block device %v size: %w", bde.Name(), err)) | 
|  | } else { | 
|  | // Linux always defines size in terms of 512 byte blocks regardless | 
|  | // of what the configured logical and physical block sizes are. | 
|  | bd.CapacityBytes = size * 512 | 
|  | } | 
|  | } | 
|  | if lbsRaw, err := os.ReadFile(sysfsDir + "/queue/logical_block_size"); err == nil { | 
|  | lbs, err := strconv.ParseInt(strings.TrimSpace(string(lbsRaw)), 10, 32) | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("unable to parse block device %v logical block size: %w", bde.Name(), err)) | 
|  | } else { | 
|  | bd.LogicalBlockSizeBytes = int32(lbs) | 
|  | } | 
|  | } | 
|  | if pbsRaw, err := os.ReadFile(sysfsDir + "/queue/physical_block_size"); err == nil { | 
|  | pbs, err := strconv.ParseInt(strings.TrimSpace(string(pbsRaw)), 10, 32) | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("unable to parse physical block size: %w", err)) | 
|  | } else { | 
|  | bd.PhysicalBlockSizeBytes = int32(pbs) | 
|  | } | 
|  | } | 
|  | if strings.HasPrefix(bde.Name(), "nvme") { | 
|  | err := c.gatherNVMe(&bd, bde) | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("block device %v: %w", bde.Name(), err)) | 
|  | } else { | 
|  | c.node.BlockDevice = append(c.node.BlockDevice, &bd) | 
|  | } | 
|  | } | 
|  | if strings.HasPrefix(bde.Name(), "sd") { | 
|  | err := c.gatherSCSI(&bd, bde) | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("block device %v: %w", bde.Name(), err)) | 
|  | } else { | 
|  | c.node.BlockDevice = append(c.node.BlockDevice, &bd) | 
|  | } | 
|  | } | 
|  | if strings.HasPrefix(bde.Name(), "mmcblk") { | 
|  | // TODO: MMC information | 
|  | bd.Protocol = api.BlockDevice_MMC | 
|  | c.node.BlockDevice = append(c.node.BlockDevice, &bd) | 
|  | } | 
|  | } | 
|  | return | 
|  | } | 
|  |  | 
|  | var speedModeRegexp = regexp.MustCompile("^([0-9]+)base") | 
|  |  | 
|  | const mbps = (1000 * 1000) / 8 | 
|  |  | 
|  | func (c *hwReportContext) gatherNICs() { | 
|  | links, err := netlink.LinkList() | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("failed to list network links: %w", err)) | 
|  | return | 
|  | } | 
|  | ethClient, err := ethtool.New() | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("failed to get ethtool netlink client: %w", err)) | 
|  | return | 
|  | } | 
|  | defer ethClient.Close() | 
|  | for _, l := range links { | 
|  | if l.Type() != "device" || len(l.Attrs().HardwareAddr) == 0 { | 
|  | // Not a physical device, ignore | 
|  | continue | 
|  | } | 
|  | var nif api.NetworkInterface | 
|  | nif.Mac = l.Attrs().HardwareAddr | 
|  | mode, err := ethClient.LinkMode(ethtool.Interface{Index: l.Attrs().Index}) | 
|  | if err == nil { | 
|  | if mode.SpeedMegabits < math.MaxInt32 { | 
|  | nif.CurrentSpeedBytes = int64(mode.SpeedMegabits) * mbps | 
|  | } | 
|  | speeds := make(map[int64]bool) | 
|  | for _, m := range mode.Ours { | 
|  | // Doing this with a regexp is arguably more future-proof as | 
|  | // we don't need to add each link mode for the detection to | 
|  | // work. | 
|  | modeParts := speedModeRegexp.FindStringSubmatch(m.Name) | 
|  | if len(modeParts) > 0 { | 
|  | speedMegabits, err := strconv.ParseInt(modeParts[1], 10, 64) | 
|  | if err != nil { | 
|  | c.errors = append(c.errors, fmt.Errorf("nic %v: failed to parse %q as integer: %w", l.Attrs().Name, modeParts[1], err)) | 
|  | continue | 
|  | } | 
|  | speeds[int64(speedMegabits)*mbps] = true | 
|  | } | 
|  | } | 
|  | for s := range speeds { | 
|  | nif.SupportedSpeedBytes = append(nif.SupportedSpeedBytes, s) | 
|  | } | 
|  | // Go randomizes the map keys, sort to make the report stable. | 
|  | sort.Slice(nif.SupportedSpeedBytes, func(i, j int) bool { return nif.SupportedSpeedBytes[i] > nif.SupportedSpeedBytes[j] }) | 
|  | } | 
|  | state, err := ethClient.LinkState(ethtool.Interface{Index: l.Attrs().Index}) | 
|  | if err == nil { | 
|  | nif.LinkUp = state.Link | 
|  | } else { | 
|  | // We have no ethtool support, fall back to checking if Linux | 
|  | // thinks the link is up. | 
|  | nif.LinkUp = l.Attrs().OperState == netlink.OperUp | 
|  | } | 
|  | // Linux blocks creation of interfaces which conflict with special path | 
|  | // characters, so this path assembly is fine. | 
|  | driverPath, err := os.Readlink("/sys/class/net/" + l.Attrs().Name + "/device/driver") | 
|  | if err == nil { | 
|  | nif.Driver = filepath.Base(driverPath) | 
|  | } | 
|  | c.node.NetworkInterface = append(c.node.NetworkInterface, &nif) | 
|  | } | 
|  | return | 
|  | } | 
|  |  | 
|  | func gatherHWReport() (*api.Node, []error) { | 
|  | hwReportCtx := hwReportContext{ | 
|  | node: &api.Node{}, | 
|  | } | 
|  | hwReportCtx.node.EfiSupport = api.EFISupport_EFI_UNKNOWN | 
|  |  | 
|  | hwReportCtx.gatherCPU() | 
|  | hwReportCtx.gatherSMBIOS() | 
|  | if hwReportCtx.node.MemoryInstalledBytes == 0 { | 
|  | hwReportCtx.gatherMemorySysfs() | 
|  | } | 
|  | var sysinfo unix.Sysinfo_t | 
|  | if err := unix.Sysinfo(&sysinfo); err != nil { | 
|  | hwReportCtx.errors = append(hwReportCtx.errors, fmt.Errorf("unable to execute sysinfo syscall: %w", err)) | 
|  | } else { | 
|  | hwReportCtx.node.MemoryUsableRatio = float32(sysinfo.Totalram) / float32(hwReportCtx.node.MemoryInstalledBytes) | 
|  | } | 
|  | hwReportCtx.gatherNICs() | 
|  | hwReportCtx.gatherBlockDevices() | 
|  |  | 
|  | if _, err := os.Stat("/sys/firmware/efi/runtime"); err == nil { | 
|  | hwReportCtx.node.EfiSupport = api.EFISupport_EFI_ENABLED | 
|  | } | 
|  |  | 
|  | return hwReportCtx.node, hwReportCtx.errors | 
|  | } |