pve-exporter/collector/node_config.go
Davíð Steinn Geirsson b59abd59d3 feat: add node_config collector (pve_onboot_status)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 11:36:17 +00:00

178 lines
3.8 KiB
Go

package collector
import (
"encoding/json"
"fmt"
"log/slog"
"strconv"
"sync"
"github.com/prometheus/client_golang/prometheus"
)
func init() {
registerCollector("node_config", func(logger *slog.Logger) Collector {
return newNodeConfigCollector(logger)
})
}
type nodeConfigCollector struct {
logger *slog.Logger
mu sync.Mutex
nodes []string
}
func newNodeConfigCollector(logger *slog.Logger) *nodeConfigCollector {
return &nodeConfigCollector{logger: logger}
}
func (c *nodeConfigCollector) SetNodes(nodes []string) {
c.mu.Lock()
defer c.mu.Unlock()
c.nodes = nodes
}
type vmListResponse struct {
Data []vmListEntry `json:"data"`
}
type vmListEntry struct {
VMID int `json:"vmid"`
}
type vmConfigResponse struct {
Data vmConfigData `json:"data"`
}
type vmConfigData struct {
Onboot *int `json:"onboot"`
}
var (
onbootStatusDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "onboot_status"),
"Whether a guest is configured to start on boot.",
[]string{"id", "node", "type"},
nil,
)
)
func (c *nodeConfigCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
c.mu.Lock()
nodes := make([]string, len(c.nodes))
copy(nodes, c.nodes)
c.mu.Unlock()
var (
wg sync.WaitGroup
errs []error
emu sync.Mutex
)
sem := make(chan struct{}, client.MaxConcurrent())
for _, node := range nodes {
wg.Add(1)
go func(node string) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
if err := c.collectNode(client, ch, node, sem); err != nil {
emu.Lock()
errs = append(errs, err)
emu.Unlock()
}
}(node)
}
wg.Wait()
if len(errs) > 0 {
return fmt.Errorf("node_config collection errors: %v", errs)
}
return nil
}
func (c *nodeConfigCollector) collectNode(client *Client, ch chan<- prometheus.Metric, node string, sem chan struct{}) error {
// Fetch QEMU VMs.
qemuBody, err := client.Get(fmt.Sprintf("/nodes/%s/qemu", node))
if err != nil {
return fmt.Errorf("failed to get qemu list for node %s: %w", node, err)
}
var qemuResp vmListResponse
if err := json.Unmarshal(qemuBody, &qemuResp); err != nil {
return fmt.Errorf("failed to parse qemu list for node %s: %w", node, err)
}
// Fetch LXC containers.
lxcBody, err := client.Get(fmt.Sprintf("/nodes/%s/lxc", node))
if err != nil {
return fmt.Errorf("failed to get lxc list for node %s: %w", node, err)
}
var lxcResp vmListResponse
if err := json.Unmarshal(lxcBody, &lxcResp); err != nil {
return fmt.Errorf("failed to parse lxc list for node %s: %w", node, err)
}
// Collect configs for all VMs/CTs in parallel.
type configJob struct {
vmType string
vmid int
}
var jobs []configJob
for _, vm := range qemuResp.Data {
jobs = append(jobs, configJob{vmType: "qemu", vmid: vm.VMID})
}
for _, ct := range lxcResp.Data {
jobs = append(jobs, configJob{vmType: "lxc", vmid: ct.VMID})
}
var (
wg sync.WaitGroup
errs []error
emu sync.Mutex
)
for _, job := range jobs {
wg.Add(1)
go func(j configJob) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
path := fmt.Sprintf("/nodes/%s/%s/%d/config", node, j.vmType, j.vmid)
body, err := client.Get(path)
if err != nil {
emu.Lock()
errs = append(errs, fmt.Errorf("failed to get config %s: %w", path, err))
emu.Unlock()
return
}
var resp vmConfigResponse
if err := json.Unmarshal(body, &resp); err != nil {
emu.Lock()
errs = append(errs, fmt.Errorf("failed to parse config %s: %w", path, err))
emu.Unlock()
return
}
var onboot float64
if resp.Data.Onboot != nil {
onboot = float64(*resp.Data.Onboot)
}
id := j.vmType + "/" + strconv.Itoa(j.vmid)
ch <- prometheus.MustNewConstMetric(onbootStatusDesc, prometheus.GaugeValue, onboot, id, node, j.vmType)
}(job)
}
wg.Wait()
if len(errs) > 0 {
return fmt.Errorf("config fetch errors: %v", errs)
}
return nil
}