feat: add physical_disk collector (health, wearout, size, OSD mapping)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Davíð Steinn Geirsson 2026-03-20 15:33:46 +00:00
parent 0afa5b0e19
commit a88c696bfd

157
collector/physical_disk.go Normal file
View file

@ -0,0 +1,157 @@
package collector
import (
"encoding/json"
"fmt"
"log/slog"
"sync"
"github.com/prometheus/client_golang/prometheus"
)
func init() {
registerCollector("physical_disk", func(logger *slog.Logger) Collector {
return newPhysicalDiskCollector(logger)
})
}
type physicalDiskCollector struct {
logger *slog.Logger
mu sync.Mutex
nodes []string
}
func newPhysicalDiskCollector(logger *slog.Logger) *physicalDiskCollector {
return &physicalDiskCollector{logger: logger}
}
func (c *physicalDiskCollector) SetNodes(nodes []string) {
c.mu.Lock()
defer c.mu.Unlock()
c.nodes = nodes
}
type diskListResponse struct {
Data []diskEntry `json:"data"`
}
type diskEntry struct {
DevPath string `json:"devpath"`
Health string `json:"health"`
Wearout json.RawMessage `json:"wearout"`
Size float64 `json:"size"`
Model string `json:"model"`
Serial string `json:"serial"`
Type string `json:"type"`
Used string `json:"used"`
OSDList []string `json:"osdid-list"`
}
var (
physDiskHealthDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "physical_disk", "health"),
"1 if SMART health is PASSED, 0 otherwise.",
[]string{"node", "devpath", "model", "serial", "type"}, nil,
)
physDiskWearoutDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "physical_disk", "wearout_remaining_ratio"),
"Wearout remaining as a ratio (1.0 = new).",
[]string{"node", "devpath"}, nil,
)
physDiskSizeDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "physical_disk", "size_bytes"),
"Physical disk size in bytes.",
[]string{"node", "devpath"}, nil,
)
physDiskInfoDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "physical_disk", "info"),
"Physical disk information.",
[]string{"node", "devpath", "model", "serial", "type", "used"}, nil,
)
physDiskOSDDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "physical_disk", "osd"),
"Disk-to-OSD mapping.",
[]string{"node", "devpath", "osd"}, nil,
)
)
func (c *physicalDiskCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
c.mu.Lock()
nodes := make([]string, len(c.nodes))
copy(nodes, c.nodes)
c.mu.Unlock()
var (
wg sync.WaitGroup
errs []error
emu sync.Mutex
)
sem := make(chan struct{}, client.MaxConcurrent())
for _, node := range nodes {
wg.Add(1)
go func(node string) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
if err := c.collectNode(client, ch, node); err != nil {
emu.Lock()
errs = append(errs, err)
emu.Unlock()
}
}(node)
}
wg.Wait()
if len(errs) > 0 {
return fmt.Errorf("physical_disk collection errors: %v", errs)
}
return nil
}
func (c *physicalDiskCollector) collectNode(client *Client, ch chan<- prometheus.Metric, node string) error {
body, err := client.Get(fmt.Sprintf("/nodes/%s/disks/list", node))
if err != nil {
return fmt.Errorf("failed to get disks for node %s: %w", node, err)
}
var resp diskListResponse
if err := json.Unmarshal(body, &resp); err != nil {
return fmt.Errorf("failed to parse disks response for node %s: %w", node, err)
}
for _, disk := range resp.Data {
// Health: 1 if PASSED, 0 otherwise.
var health float64
if disk.Health == "PASSED" {
health = 1
}
ch <- prometheus.MustNewConstMetric(physDiskHealthDesc, prometheus.GaugeValue, health,
node, disk.DevPath, disk.Model, disk.Serial, disk.Type)
// Wearout: try to parse as number. Skip if "N/A" or not a number.
if len(disk.Wearout) > 0 {
var wearout float64
if err := json.Unmarshal(disk.Wearout, &wearout); err == nil {
ch <- prometheus.MustNewConstMetric(physDiskWearoutDesc, prometheus.GaugeValue, wearout/100, node, disk.DevPath)
}
}
// Size.
ch <- prometheus.MustNewConstMetric(physDiskSizeDesc, prometheus.GaugeValue, disk.Size, node, disk.DevPath)
// Info.
ch <- prometheus.MustNewConstMetric(physDiskInfoDesc, prometheus.GaugeValue, 1,
node, disk.DevPath, disk.Model, disk.Serial, disk.Type, disk.Used)
// OSD mapping.
for _, osdID := range disk.OSDList {
osd := fmt.Sprintf("osd.%s", osdID)
ch <- prometheus.MustNewConstMetric(physDiskOSDDesc, prometheus.GaugeValue, 1, node, disk.DevPath, osd)
}
}
return nil
}