diff --git a/collector/physical_disk.go b/collector/physical_disk.go new file mode 100644 index 0000000..59b8078 --- /dev/null +++ b/collector/physical_disk.go @@ -0,0 +1,157 @@ +package collector + +import ( + "encoding/json" + "fmt" + "log/slog" + "sync" + + "github.com/prometheus/client_golang/prometheus" +) + +func init() { + registerCollector("physical_disk", func(logger *slog.Logger) Collector { + return newPhysicalDiskCollector(logger) + }) +} + +type physicalDiskCollector struct { + logger *slog.Logger + mu sync.Mutex + nodes []string +} + +func newPhysicalDiskCollector(logger *slog.Logger) *physicalDiskCollector { + return &physicalDiskCollector{logger: logger} +} + +func (c *physicalDiskCollector) SetNodes(nodes []string) { + c.mu.Lock() + defer c.mu.Unlock() + c.nodes = nodes +} + +type diskListResponse struct { + Data []diskEntry `json:"data"` +} + +type diskEntry struct { + DevPath string `json:"devpath"` + Health string `json:"health"` + Wearout json.RawMessage `json:"wearout"` + Size float64 `json:"size"` + Model string `json:"model"` + Serial string `json:"serial"` + Type string `json:"type"` + Used string `json:"used"` + OSDList []string `json:"osdid-list"` +} + +var ( + physDiskHealthDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "physical_disk", "health"), + "1 if SMART health is PASSED, 0 otherwise.", + []string{"node", "devpath", "model", "serial", "type"}, nil, + ) + physDiskWearoutDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "physical_disk", "wearout_remaining_ratio"), + "Wearout remaining as a ratio (1.0 = new).", + []string{"node", "devpath"}, nil, + ) + physDiskSizeDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "physical_disk", "size_bytes"), + "Physical disk size in bytes.", + []string{"node", "devpath"}, nil, + ) + physDiskInfoDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "physical_disk", "info"), + "Physical disk information.", + []string{"node", "devpath", "model", "serial", "type", "used"}, nil, + ) + physDiskOSDDesc = prometheus.NewDesc( + prometheus.BuildFQName(namespace, "physical_disk", "osd"), + "Disk-to-OSD mapping.", + []string{"node", "devpath", "osd"}, nil, + ) +) + +func (c *physicalDiskCollector) Update(client *Client, ch chan<- prometheus.Metric) error { + c.mu.Lock() + nodes := make([]string, len(c.nodes)) + copy(nodes, c.nodes) + c.mu.Unlock() + + var ( + wg sync.WaitGroup + errs []error + emu sync.Mutex + ) + + sem := make(chan struct{}, client.MaxConcurrent()) + + for _, node := range nodes { + wg.Add(1) + go func(node string) { + defer wg.Done() + sem <- struct{}{} + defer func() { <-sem }() + + if err := c.collectNode(client, ch, node); err != nil { + emu.Lock() + errs = append(errs, err) + emu.Unlock() + } + }(node) + } + wg.Wait() + + if len(errs) > 0 { + return fmt.Errorf("physical_disk collection errors: %v", errs) + } + return nil +} + +func (c *physicalDiskCollector) collectNode(client *Client, ch chan<- prometheus.Metric, node string) error { + body, err := client.Get(fmt.Sprintf("/nodes/%s/disks/list", node)) + if err != nil { + return fmt.Errorf("failed to get disks for node %s: %w", node, err) + } + + var resp diskListResponse + if err := json.Unmarshal(body, &resp); err != nil { + return fmt.Errorf("failed to parse disks response for node %s: %w", node, err) + } + + for _, disk := range resp.Data { + // Health: 1 if PASSED, 0 otherwise. + var health float64 + if disk.Health == "PASSED" { + health = 1 + } + ch <- prometheus.MustNewConstMetric(physDiskHealthDesc, prometheus.GaugeValue, health, + node, disk.DevPath, disk.Model, disk.Serial, disk.Type) + + // Wearout: try to parse as number. Skip if "N/A" or not a number. + if len(disk.Wearout) > 0 { + var wearout float64 + if err := json.Unmarshal(disk.Wearout, &wearout); err == nil { + ch <- prometheus.MustNewConstMetric(physDiskWearoutDesc, prometheus.GaugeValue, wearout/100, node, disk.DevPath) + } + } + + // Size. + ch <- prometheus.MustNewConstMetric(physDiskSizeDesc, prometheus.GaugeValue, disk.Size, node, disk.DevPath) + + // Info. + ch <- prometheus.MustNewConstMetric(physDiskInfoDesc, prometheus.GaugeValue, 1, + node, disk.DevPath, disk.Model, disk.Serial, disk.Type, disk.Used) + + // OSD mapping. + for _, osdID := range disk.OSDList { + osd := fmt.Sprintf("osd.%s", osdID) + ch <- prometheus.MustNewConstMetric(physDiskOSDDesc, prometheus.GaugeValue, 1, node, disk.DevPath, osd) + } + } + + return nil +}