pve-exporter/collector/cluster_resources.go
Davíð Steinn Geirsson a62264edf8 feat: add cluster_resources collector (16 metrics)
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-20 11:33:03 +00:00

240 lines
8 KiB
Go

package collector
import (
"encoding/json"
"fmt"
"log/slog"
"strconv"
"github.com/prometheus/client_golang/prometheus"
)
func init() {
registerCollector("cluster_resources", func(logger *slog.Logger) Collector {
return newClusterResourcesCollector(logger)
})
}
type clusterResourcesCollector struct {
logger *slog.Logger
}
func newClusterResourcesCollector(logger *slog.Logger) *clusterResourcesCollector {
return &clusterResourcesCollector{logger: logger}
}
type resourceEntry struct {
Type string `json:"type"`
ID string `json:"id"`
Node string `json:"node"`
Name string `json:"name"`
Status string `json:"status"`
VMID int `json:"vmid"`
CPU float64 `json:"cpu"`
MaxCPU float64 `json:"maxcpu"`
Mem float64 `json:"mem"`
MaxMem float64 `json:"maxmem"`
Disk float64 `json:"disk"`
MaxDisk float64 `json:"maxdisk"`
Uptime float64 `json:"uptime"`
NetIn float64 `json:"netin"`
NetOut float64 `json:"netout"`
DiskRead float64 `json:"diskread"`
DiskWrite float64 `json:"diskwrite"`
Template int `json:"template"`
HAState string `json:"hastate"`
Tags string `json:"tags"`
Lock string `json:"lock"`
Storage string `json:"storage"`
PluginType string `json:"plugintype"`
Content string `json:"content"`
Shared int `json:"shared"`
}
type resourceResponse struct {
Data []resourceEntry `json:"data"`
}
var (
upDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "up"),
"Whether the resource is up (1) or down (0).",
[]string{"id"},
nil,
)
cpuUsageRatioDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "cpu_usage_ratio"),
"CPU usage ratio.",
[]string{"id"},
nil,
)
cpuUsageLimitDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "cpu_usage_limit"),
"CPU usage limit (number of CPUs).",
[]string{"id"},
nil,
)
memoryUsageBytesDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "memory_usage_bytes"),
"Memory usage in bytes.",
[]string{"id"},
nil,
)
memorySizeBytesDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "memory_size_bytes"),
"Memory size in bytes.",
[]string{"id"},
nil,
)
diskUsageBytesDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "disk_usage_bytes"),
"Disk usage in bytes.",
[]string{"id"},
nil,
)
diskSizeBytesDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "disk_size_bytes"),
"Disk size in bytes.",
[]string{"id"},
nil,
)
uptimeSecondsDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "uptime_seconds"),
"Uptime in seconds.",
[]string{"id"},
nil,
)
networkTransmitBytesTotalDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "network_transmit_bytes_total"),
"Total bytes transmitted over the network.",
[]string{"id"},
nil,
)
networkReceiveBytesTotalDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "network_receive_bytes_total"),
"Total bytes received over the network.",
[]string{"id"},
nil,
)
diskWrittenBytesTotalDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "disk_written_bytes_total"),
"Total bytes written to disk.",
[]string{"id"},
nil,
)
diskReadBytesTotalDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "disk_read_bytes_total"),
"Total bytes read from disk.",
[]string{"id"},
nil,
)
guestInfoDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "guest_info"),
"Information about a guest (VM or container).",
[]string{"id", "node", "name", "type", "template", "tags"},
nil,
)
haStateDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "ha_state"),
"HA manager state of the resource.",
[]string{"id", "state"},
nil,
)
lockStateDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "lock_state"),
"Lock state of the resource.",
[]string{"id", "state"},
nil,
)
storageSharedDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "storage_shared"),
"Whether the storage is shared (1) or local (0).",
[]string{"id"},
nil,
)
storageInfoDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "storage_info"),
"Information about a storage resource.",
[]string{"id", "node", "storage", "plugintype", "content"},
nil,
)
)
func (c *clusterResourcesCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
body, err := client.Get("/cluster/resources")
if err != nil {
return fmt.Errorf("failed to get /cluster/resources: %w", err)
}
var resp resourceResponse
if err := json.Unmarshal(body, &resp); err != nil {
return fmt.Errorf("failed to parse /cluster/resources response: %w", err)
}
for _, entry := range resp.Data {
switch entry.Type {
case "node":
c.emitNode(ch, entry)
case "qemu", "lxc":
c.emitGuest(ch, entry)
case "storage":
c.emitStorage(ch, entry)
}
}
return nil
}
func (c *clusterResourcesCollector) emitNode(ch chan<- prometheus.Metric, e resourceEntry) {
var up float64
if e.Status == "online" {
up = 1
}
ch <- prometheus.MustNewConstMetric(upDesc, prometheus.GaugeValue, up, e.ID)
ch <- prometheus.MustNewConstMetric(cpuUsageRatioDesc, prometheus.GaugeValue, e.CPU, e.ID)
ch <- prometheus.MustNewConstMetric(cpuUsageLimitDesc, prometheus.GaugeValue, e.MaxCPU, e.ID)
ch <- prometheus.MustNewConstMetric(memoryUsageBytesDesc, prometheus.GaugeValue, e.Mem, e.ID)
ch <- prometheus.MustNewConstMetric(memorySizeBytesDesc, prometheus.GaugeValue, e.MaxMem, e.ID)
ch <- prometheus.MustNewConstMetric(diskUsageBytesDesc, prometheus.GaugeValue, e.Disk, e.ID)
ch <- prometheus.MustNewConstMetric(diskSizeBytesDesc, prometheus.GaugeValue, e.MaxDisk, e.ID)
ch <- prometheus.MustNewConstMetric(uptimeSecondsDesc, prometheus.GaugeValue, e.Uptime, e.ID)
}
func (c *clusterResourcesCollector) emitGuest(ch chan<- prometheus.Metric, e resourceEntry) {
var up float64
if e.Status == "running" {
up = 1
}
ch <- prometheus.MustNewConstMetric(upDesc, prometheus.GaugeValue, up, e.ID)
ch <- prometheus.MustNewConstMetric(cpuUsageRatioDesc, prometheus.GaugeValue, e.CPU, e.ID)
ch <- prometheus.MustNewConstMetric(cpuUsageLimitDesc, prometheus.GaugeValue, e.MaxCPU, e.ID)
ch <- prometheus.MustNewConstMetric(memoryUsageBytesDesc, prometheus.GaugeValue, e.Mem, e.ID)
ch <- prometheus.MustNewConstMetric(memorySizeBytesDesc, prometheus.GaugeValue, e.MaxMem, e.ID)
ch <- prometheus.MustNewConstMetric(diskUsageBytesDesc, prometheus.GaugeValue, e.Disk, e.ID)
ch <- prometheus.MustNewConstMetric(diskSizeBytesDesc, prometheus.GaugeValue, e.MaxDisk, e.ID)
ch <- prometheus.MustNewConstMetric(uptimeSecondsDesc, prometheus.GaugeValue, e.Uptime, e.ID)
ch <- prometheus.MustNewConstMetric(networkTransmitBytesTotalDesc, prometheus.CounterValue, e.NetOut, e.ID)
ch <- prometheus.MustNewConstMetric(networkReceiveBytesTotalDesc, prometheus.CounterValue, e.NetIn, e.ID)
ch <- prometheus.MustNewConstMetric(diskWrittenBytesTotalDesc, prometheus.CounterValue, e.DiskWrite, e.ID)
ch <- prometheus.MustNewConstMetric(diskReadBytesTotalDesc, prometheus.CounterValue, e.DiskRead, e.ID)
ch <- prometheus.MustNewConstMetric(guestInfoDesc, prometheus.GaugeValue, 1,
e.ID, e.Node, e.Name, e.Type, strconv.Itoa(e.Template), e.Tags,
)
if e.HAState != "" {
ch <- prometheus.MustNewConstMetric(haStateDesc, prometheus.GaugeValue, 1, e.ID, e.HAState)
}
if e.Lock != "" {
ch <- prometheus.MustNewConstMetric(lockStateDesc, prometheus.GaugeValue, 1, e.ID, e.Lock)
}
}
func (c *clusterResourcesCollector) emitStorage(ch chan<- prometheus.Metric, e resourceEntry) {
ch <- prometheus.MustNewConstMetric(diskUsageBytesDesc, prometheus.GaugeValue, e.Disk, e.ID)
ch <- prometheus.MustNewConstMetric(diskSizeBytesDesc, prometheus.GaugeValue, e.MaxDisk, e.ID)
ch <- prometheus.MustNewConstMetric(storageSharedDesc, prometheus.GaugeValue, float64(e.Shared), e.ID)
ch <- prometheus.MustNewConstMetric(storageInfoDesc, prometheus.GaugeValue, 1,
e.ID, e.Node, e.Storage, e.PluginType, e.Content,
)
}