feat: add physical_disk collector (health, wearout, size, OSD mapping)
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0afa5b0e19
commit
a88c696bfd
1 changed files with 157 additions and 0 deletions
157
collector/physical_disk.go
Normal file
157
collector/physical_disk.go
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"sync"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
)
|
||||
|
||||
func init() {
|
||||
registerCollector("physical_disk", func(logger *slog.Logger) Collector {
|
||||
return newPhysicalDiskCollector(logger)
|
||||
})
|
||||
}
|
||||
|
||||
type physicalDiskCollector struct {
|
||||
logger *slog.Logger
|
||||
mu sync.Mutex
|
||||
nodes []string
|
||||
}
|
||||
|
||||
func newPhysicalDiskCollector(logger *slog.Logger) *physicalDiskCollector {
|
||||
return &physicalDiskCollector{logger: logger}
|
||||
}
|
||||
|
||||
func (c *physicalDiskCollector) SetNodes(nodes []string) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.nodes = nodes
|
||||
}
|
||||
|
||||
type diskListResponse struct {
|
||||
Data []diskEntry `json:"data"`
|
||||
}
|
||||
|
||||
type diskEntry struct {
|
||||
DevPath string `json:"devpath"`
|
||||
Health string `json:"health"`
|
||||
Wearout json.RawMessage `json:"wearout"`
|
||||
Size float64 `json:"size"`
|
||||
Model string `json:"model"`
|
||||
Serial string `json:"serial"`
|
||||
Type string `json:"type"`
|
||||
Used string `json:"used"`
|
||||
OSDList []string `json:"osdid-list"`
|
||||
}
|
||||
|
||||
var (
|
||||
physDiskHealthDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "physical_disk", "health"),
|
||||
"1 if SMART health is PASSED, 0 otherwise.",
|
||||
[]string{"node", "devpath", "model", "serial", "type"}, nil,
|
||||
)
|
||||
physDiskWearoutDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "physical_disk", "wearout_remaining_ratio"),
|
||||
"Wearout remaining as a ratio (1.0 = new).",
|
||||
[]string{"node", "devpath"}, nil,
|
||||
)
|
||||
physDiskSizeDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "physical_disk", "size_bytes"),
|
||||
"Physical disk size in bytes.",
|
||||
[]string{"node", "devpath"}, nil,
|
||||
)
|
||||
physDiskInfoDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "physical_disk", "info"),
|
||||
"Physical disk information.",
|
||||
[]string{"node", "devpath", "model", "serial", "type", "used"}, nil,
|
||||
)
|
||||
physDiskOSDDesc = prometheus.NewDesc(
|
||||
prometheus.BuildFQName(namespace, "physical_disk", "osd"),
|
||||
"Disk-to-OSD mapping.",
|
||||
[]string{"node", "devpath", "osd"}, nil,
|
||||
)
|
||||
)
|
||||
|
||||
func (c *physicalDiskCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
|
||||
c.mu.Lock()
|
||||
nodes := make([]string, len(c.nodes))
|
||||
copy(nodes, c.nodes)
|
||||
c.mu.Unlock()
|
||||
|
||||
var (
|
||||
wg sync.WaitGroup
|
||||
errs []error
|
||||
emu sync.Mutex
|
||||
)
|
||||
|
||||
sem := make(chan struct{}, client.MaxConcurrent())
|
||||
|
||||
for _, node := range nodes {
|
||||
wg.Add(1)
|
||||
go func(node string) {
|
||||
defer wg.Done()
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
|
||||
if err := c.collectNode(client, ch, node); err != nil {
|
||||
emu.Lock()
|
||||
errs = append(errs, err)
|
||||
emu.Unlock()
|
||||
}
|
||||
}(node)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
if len(errs) > 0 {
|
||||
return fmt.Errorf("physical_disk collection errors: %v", errs)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *physicalDiskCollector) collectNode(client *Client, ch chan<- prometheus.Metric, node string) error {
|
||||
body, err := client.Get(fmt.Sprintf("/nodes/%s/disks/list", node))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get disks for node %s: %w", node, err)
|
||||
}
|
||||
|
||||
var resp diskListResponse
|
||||
if err := json.Unmarshal(body, &resp); err != nil {
|
||||
return fmt.Errorf("failed to parse disks response for node %s: %w", node, err)
|
||||
}
|
||||
|
||||
for _, disk := range resp.Data {
|
||||
// Health: 1 if PASSED, 0 otherwise.
|
||||
var health float64
|
||||
if disk.Health == "PASSED" {
|
||||
health = 1
|
||||
}
|
||||
ch <- prometheus.MustNewConstMetric(physDiskHealthDesc, prometheus.GaugeValue, health,
|
||||
node, disk.DevPath, disk.Model, disk.Serial, disk.Type)
|
||||
|
||||
// Wearout: try to parse as number. Skip if "N/A" or not a number.
|
||||
if len(disk.Wearout) > 0 {
|
||||
var wearout float64
|
||||
if err := json.Unmarshal(disk.Wearout, &wearout); err == nil {
|
||||
ch <- prometheus.MustNewConstMetric(physDiskWearoutDesc, prometheus.GaugeValue, wearout/100, node, disk.DevPath)
|
||||
}
|
||||
}
|
||||
|
||||
// Size.
|
||||
ch <- prometheus.MustNewConstMetric(physDiskSizeDesc, prometheus.GaugeValue, disk.Size, node, disk.DevPath)
|
||||
|
||||
// Info.
|
||||
ch <- prometheus.MustNewConstMetric(physDiskInfoDesc, prometheus.GaugeValue, 1,
|
||||
node, disk.DevPath, disk.Model, disk.Serial, disk.Type, disk.Used)
|
||||
|
||||
// OSD mapping.
|
||||
for _, osdID := range disk.OSDList {
|
||||
osd := fmt.Sprintf("osd.%s", osdID)
|
||||
ch <- prometheus.MustNewConstMetric(physDiskOSDDesc, prometheus.GaugeValue, 1, node, disk.DevPath, osd)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue