205 lines
5.5 KiB
Go
205 lines
5.5 KiB
Go
package collector
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"strconv"
|
|
"sync"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
func init() {
|
|
registerCollector("node_status", func(logger *slog.Logger) Collector {
|
|
return newNodeStatusCollector(logger)
|
|
})
|
|
}
|
|
|
|
type nodeStatusCollector struct {
|
|
logger *slog.Logger
|
|
mu sync.Mutex
|
|
nodes []string
|
|
}
|
|
|
|
func newNodeStatusCollector(logger *slog.Logger) *nodeStatusCollector {
|
|
return &nodeStatusCollector{logger: logger}
|
|
}
|
|
|
|
func (c *nodeStatusCollector) SetNodes(nodes []string) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
c.nodes = nodes
|
|
}
|
|
|
|
type nodeStatusResponse struct {
|
|
Data nodeStatusData `json:"data"`
|
|
}
|
|
|
|
type nodeStatusData struct {
|
|
LoadAvg []string `json:"loadavg"`
|
|
Swap nodeStatusMem `json:"swap"`
|
|
RootFS nodeStatusFS `json:"rootfs"`
|
|
KSM nodeStatusKSM `json:"ksm"`
|
|
BootInfo nodeStatusBoot `json:"boot-info"`
|
|
}
|
|
|
|
type nodeStatusMem struct {
|
|
Total float64 `json:"total"`
|
|
Used float64 `json:"used"`
|
|
Free float64 `json:"free"`
|
|
}
|
|
|
|
type nodeStatusFS struct {
|
|
Total float64 `json:"total"`
|
|
Used float64 `json:"used"`
|
|
Avail float64 `json:"avail"`
|
|
}
|
|
|
|
type nodeStatusKSM struct {
|
|
Shared float64 `json:"shared"`
|
|
}
|
|
|
|
type nodeStatusBoot struct {
|
|
Mode string `json:"mode"`
|
|
SecureBoot int `json:"secureboot"`
|
|
}
|
|
|
|
var (
|
|
nodeLoad1Desc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "load1"),
|
|
"1-minute load average.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeLoad5Desc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "load5"),
|
|
"5-minute load average.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeLoad15Desc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "load15"),
|
|
"15-minute load average.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeSwapTotalDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "swap_total_bytes"),
|
|
"Total swap in bytes.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeSwapUsedDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "swap_used_bytes"),
|
|
"Used swap in bytes.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeSwapFreeDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "swap_free_bytes"),
|
|
"Free swap in bytes.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeRootfsTotalDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "rootfs_total_bytes"),
|
|
"Root filesystem total size in bytes.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeRootfsUsedDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "rootfs_used_bytes"),
|
|
"Root filesystem used space in bytes.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeRootfsAvailDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "rootfs_available_bytes"),
|
|
"Root filesystem available space in bytes.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeKSMSharedDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "ksm_shared_bytes"),
|
|
"KSM shared memory in bytes.",
|
|
[]string{"node"}, nil,
|
|
)
|
|
nodeBootModeDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "node", "boot_mode_info"),
|
|
"Node boot mode information.",
|
|
[]string{"node", "mode", "secureboot"}, nil,
|
|
)
|
|
)
|
|
|
|
func (c *nodeStatusCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
|
|
c.mu.Lock()
|
|
nodes := make([]string, len(c.nodes))
|
|
copy(nodes, c.nodes)
|
|
c.mu.Unlock()
|
|
|
|
var (
|
|
wg sync.WaitGroup
|
|
errs []error
|
|
emu sync.Mutex
|
|
)
|
|
|
|
sem := make(chan struct{}, client.MaxConcurrent())
|
|
|
|
for _, node := range nodes {
|
|
wg.Add(1)
|
|
go func(node string) {
|
|
defer wg.Done()
|
|
sem <- struct{}{}
|
|
defer func() { <-sem }()
|
|
|
|
if err := c.collectNode(client, ch, node); err != nil {
|
|
emu.Lock()
|
|
errs = append(errs, err)
|
|
emu.Unlock()
|
|
}
|
|
}(node)
|
|
}
|
|
wg.Wait()
|
|
|
|
if len(errs) > 0 {
|
|
return fmt.Errorf("node_status collection errors: %v", errs)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *nodeStatusCollector) collectNode(client *Client, ch chan<- prometheus.Metric, node string) error {
|
|
body, err := client.Get(fmt.Sprintf("/nodes/%s/status", node))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get status for node %s: %w", node, err)
|
|
}
|
|
|
|
var resp nodeStatusResponse
|
|
if err := json.Unmarshal(body, &resp); err != nil {
|
|
return fmt.Errorf("failed to parse status response for node %s: %w", node, err)
|
|
}
|
|
|
|
d := resp.Data
|
|
|
|
// Load averages (strings in API).
|
|
if len(d.LoadAvg) >= 3 {
|
|
for i, desc := range []*prometheus.Desc{nodeLoad1Desc, nodeLoad5Desc, nodeLoad15Desc} {
|
|
val, err := strconv.ParseFloat(d.LoadAvg[i], 64)
|
|
if err != nil {
|
|
c.logger.Warn("failed to parse load average", "node", node, "index", i, "err", err)
|
|
continue
|
|
}
|
|
ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, val, node)
|
|
}
|
|
}
|
|
|
|
// Swap.
|
|
ch <- prometheus.MustNewConstMetric(nodeSwapTotalDesc, prometheus.GaugeValue, d.Swap.Total, node)
|
|
ch <- prometheus.MustNewConstMetric(nodeSwapUsedDesc, prometheus.GaugeValue, d.Swap.Used, node)
|
|
ch <- prometheus.MustNewConstMetric(nodeSwapFreeDesc, prometheus.GaugeValue, d.Swap.Free, node)
|
|
|
|
// Root filesystem.
|
|
ch <- prometheus.MustNewConstMetric(nodeRootfsTotalDesc, prometheus.GaugeValue, d.RootFS.Total, node)
|
|
ch <- prometheus.MustNewConstMetric(nodeRootfsUsedDesc, prometheus.GaugeValue, d.RootFS.Used, node)
|
|
ch <- prometheus.MustNewConstMetric(nodeRootfsAvailDesc, prometheus.GaugeValue, d.RootFS.Avail, node)
|
|
|
|
// KSM.
|
|
ch <- prometheus.MustNewConstMetric(nodeKSMSharedDesc, prometheus.GaugeValue, d.KSM.Shared, node)
|
|
|
|
// Boot mode info.
|
|
secureboot := strconv.Itoa(d.BootInfo.SecureBoot)
|
|
ch <- prometheus.MustNewConstMetric(nodeBootModeDesc, prometheus.GaugeValue, 1, node, d.BootInfo.Mode, secureboot)
|
|
|
|
return nil
|
|
}
|