Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
178 lines
3.8 KiB
Go
178 lines
3.8 KiB
Go
package collector
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"strconv"
|
|
"sync"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
func init() {
|
|
registerCollector("node_config", func(logger *slog.Logger) Collector {
|
|
return newNodeConfigCollector(logger)
|
|
})
|
|
}
|
|
|
|
type nodeConfigCollector struct {
|
|
logger *slog.Logger
|
|
mu sync.Mutex
|
|
nodes []string
|
|
}
|
|
|
|
func newNodeConfigCollector(logger *slog.Logger) *nodeConfigCollector {
|
|
return &nodeConfigCollector{logger: logger}
|
|
}
|
|
|
|
func (c *nodeConfigCollector) SetNodes(nodes []string) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
c.nodes = nodes
|
|
}
|
|
|
|
type vmListResponse struct {
|
|
Data []vmListEntry `json:"data"`
|
|
}
|
|
|
|
type vmListEntry struct {
|
|
VMID int `json:"vmid"`
|
|
}
|
|
|
|
type vmConfigResponse struct {
|
|
Data vmConfigData `json:"data"`
|
|
}
|
|
|
|
type vmConfigData struct {
|
|
Onboot *int `json:"onboot"`
|
|
}
|
|
|
|
var (
|
|
onbootStatusDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "onboot_status"),
|
|
"Whether a guest is configured to start on boot.",
|
|
[]string{"id", "node", "type"},
|
|
nil,
|
|
)
|
|
)
|
|
|
|
func (c *nodeConfigCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
|
|
c.mu.Lock()
|
|
nodes := make([]string, len(c.nodes))
|
|
copy(nodes, c.nodes)
|
|
c.mu.Unlock()
|
|
|
|
var (
|
|
wg sync.WaitGroup
|
|
errs []error
|
|
emu sync.Mutex
|
|
)
|
|
|
|
sem := make(chan struct{}, client.MaxConcurrent())
|
|
|
|
for _, node := range nodes {
|
|
wg.Add(1)
|
|
go func(node string) {
|
|
defer wg.Done()
|
|
sem <- struct{}{}
|
|
defer func() { <-sem }()
|
|
|
|
if err := c.collectNode(client, ch, node, sem); err != nil {
|
|
emu.Lock()
|
|
errs = append(errs, err)
|
|
emu.Unlock()
|
|
}
|
|
}(node)
|
|
}
|
|
wg.Wait()
|
|
|
|
if len(errs) > 0 {
|
|
return fmt.Errorf("node_config collection errors: %v", errs)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *nodeConfigCollector) collectNode(client *Client, ch chan<- prometheus.Metric, node string, sem chan struct{}) error {
|
|
// Fetch QEMU VMs.
|
|
qemuBody, err := client.Get(fmt.Sprintf("/nodes/%s/qemu", node))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get qemu list for node %s: %w", node, err)
|
|
}
|
|
|
|
var qemuResp vmListResponse
|
|
if err := json.Unmarshal(qemuBody, &qemuResp); err != nil {
|
|
return fmt.Errorf("failed to parse qemu list for node %s: %w", node, err)
|
|
}
|
|
|
|
// Fetch LXC containers.
|
|
lxcBody, err := client.Get(fmt.Sprintf("/nodes/%s/lxc", node))
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get lxc list for node %s: %w", node, err)
|
|
}
|
|
|
|
var lxcResp vmListResponse
|
|
if err := json.Unmarshal(lxcBody, &lxcResp); err != nil {
|
|
return fmt.Errorf("failed to parse lxc list for node %s: %w", node, err)
|
|
}
|
|
|
|
// Collect configs for all VMs/CTs in parallel.
|
|
type configJob struct {
|
|
vmType string
|
|
vmid int
|
|
}
|
|
|
|
var jobs []configJob
|
|
for _, vm := range qemuResp.Data {
|
|
jobs = append(jobs, configJob{vmType: "qemu", vmid: vm.VMID})
|
|
}
|
|
for _, ct := range lxcResp.Data {
|
|
jobs = append(jobs, configJob{vmType: "lxc", vmid: ct.VMID})
|
|
}
|
|
|
|
var (
|
|
wg sync.WaitGroup
|
|
errs []error
|
|
emu sync.Mutex
|
|
)
|
|
|
|
for _, job := range jobs {
|
|
wg.Add(1)
|
|
go func(j configJob) {
|
|
defer wg.Done()
|
|
sem <- struct{}{}
|
|
defer func() { <-sem }()
|
|
|
|
path := fmt.Sprintf("/nodes/%s/%s/%d/config", node, j.vmType, j.vmid)
|
|
body, err := client.Get(path)
|
|
if err != nil {
|
|
emu.Lock()
|
|
errs = append(errs, fmt.Errorf("failed to get config %s: %w", path, err))
|
|
emu.Unlock()
|
|
return
|
|
}
|
|
|
|
var resp vmConfigResponse
|
|
if err := json.Unmarshal(body, &resp); err != nil {
|
|
emu.Lock()
|
|
errs = append(errs, fmt.Errorf("failed to parse config %s: %w", path, err))
|
|
emu.Unlock()
|
|
return
|
|
}
|
|
|
|
var onboot float64
|
|
if resp.Data.Onboot != nil {
|
|
onboot = float64(*resp.Data.Onboot)
|
|
}
|
|
|
|
id := j.vmType + "/" + strconv.Itoa(j.vmid)
|
|
ch <- prometheus.MustNewConstMetric(onbootStatusDesc, prometheus.GaugeValue, onboot, id, node, j.vmType)
|
|
}(job)
|
|
}
|
|
wg.Wait()
|
|
|
|
if len(errs) > 0 {
|
|
return fmt.Errorf("config fetch errors: %v", errs)
|
|
}
|
|
return nil
|
|
}
|