Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
240 lines
8 KiB
Go
240 lines
8 KiB
Go
package collector
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"strconv"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
)
|
|
|
|
func init() {
|
|
registerCollector("cluster_resources", func(logger *slog.Logger) Collector {
|
|
return newClusterResourcesCollector(logger)
|
|
})
|
|
}
|
|
|
|
type clusterResourcesCollector struct {
|
|
logger *slog.Logger
|
|
}
|
|
|
|
func newClusterResourcesCollector(logger *slog.Logger) *clusterResourcesCollector {
|
|
return &clusterResourcesCollector{logger: logger}
|
|
}
|
|
|
|
type resourceEntry struct {
|
|
Type string `json:"type"`
|
|
ID string `json:"id"`
|
|
Node string `json:"node"`
|
|
Name string `json:"name"`
|
|
Status string `json:"status"`
|
|
VMID int `json:"vmid"`
|
|
CPU float64 `json:"cpu"`
|
|
MaxCPU float64 `json:"maxcpu"`
|
|
Mem float64 `json:"mem"`
|
|
MaxMem float64 `json:"maxmem"`
|
|
Disk float64 `json:"disk"`
|
|
MaxDisk float64 `json:"maxdisk"`
|
|
Uptime float64 `json:"uptime"`
|
|
NetIn float64 `json:"netin"`
|
|
NetOut float64 `json:"netout"`
|
|
DiskRead float64 `json:"diskread"`
|
|
DiskWrite float64 `json:"diskwrite"`
|
|
Template int `json:"template"`
|
|
HAState string `json:"hastate"`
|
|
Tags string `json:"tags"`
|
|
Lock string `json:"lock"`
|
|
Storage string `json:"storage"`
|
|
PluginType string `json:"plugintype"`
|
|
Content string `json:"content"`
|
|
Shared int `json:"shared"`
|
|
}
|
|
|
|
type resourceResponse struct {
|
|
Data []resourceEntry `json:"data"`
|
|
}
|
|
|
|
var (
|
|
upDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "up"),
|
|
"Whether the resource is up (1) or down (0).",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
cpuUsageRatioDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "cpu_usage_ratio"),
|
|
"CPU usage ratio.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
cpuUsageLimitDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "cpu_usage_limit"),
|
|
"CPU usage limit (number of CPUs).",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
memoryUsageBytesDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "memory_usage_bytes"),
|
|
"Memory usage in bytes.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
memorySizeBytesDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "memory_size_bytes"),
|
|
"Memory size in bytes.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
diskUsageBytesDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "disk_usage_bytes"),
|
|
"Disk usage in bytes.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
diskSizeBytesDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "disk_size_bytes"),
|
|
"Disk size in bytes.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
uptimeSecondsDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "uptime_seconds"),
|
|
"Uptime in seconds.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
networkTransmitBytesTotalDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "network_transmit_bytes_total"),
|
|
"Total bytes transmitted over the network.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
networkReceiveBytesTotalDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "network_receive_bytes_total"),
|
|
"Total bytes received over the network.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
diskWrittenBytesTotalDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "disk_written_bytes_total"),
|
|
"Total bytes written to disk.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
diskReadBytesTotalDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "disk_read_bytes_total"),
|
|
"Total bytes read from disk.",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
guestInfoDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "guest_info"),
|
|
"Information about a guest (VM or container).",
|
|
[]string{"id", "node", "name", "type", "template", "tags"},
|
|
nil,
|
|
)
|
|
haStateDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "ha_state"),
|
|
"HA manager state of the resource.",
|
|
[]string{"id", "state"},
|
|
nil,
|
|
)
|
|
lockStateDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "lock_state"),
|
|
"Lock state of the resource.",
|
|
[]string{"id", "state"},
|
|
nil,
|
|
)
|
|
storageSharedDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "storage_shared"),
|
|
"Whether the storage is shared (1) or local (0).",
|
|
[]string{"id"},
|
|
nil,
|
|
)
|
|
storageInfoDesc = prometheus.NewDesc(
|
|
prometheus.BuildFQName(namespace, "", "storage_info"),
|
|
"Information about a storage resource.",
|
|
[]string{"id", "node", "storage", "plugintype", "content"},
|
|
nil,
|
|
)
|
|
)
|
|
|
|
func (c *clusterResourcesCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
|
|
body, err := client.Get("/cluster/resources")
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get /cluster/resources: %w", err)
|
|
}
|
|
|
|
var resp resourceResponse
|
|
if err := json.Unmarshal(body, &resp); err != nil {
|
|
return fmt.Errorf("failed to parse /cluster/resources response: %w", err)
|
|
}
|
|
|
|
for _, entry := range resp.Data {
|
|
switch entry.Type {
|
|
case "node":
|
|
c.emitNode(ch, entry)
|
|
case "qemu", "lxc":
|
|
c.emitGuest(ch, entry)
|
|
case "storage":
|
|
c.emitStorage(ch, entry)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *clusterResourcesCollector) emitNode(ch chan<- prometheus.Metric, e resourceEntry) {
|
|
var up float64
|
|
if e.Status == "online" {
|
|
up = 1
|
|
}
|
|
ch <- prometheus.MustNewConstMetric(upDesc, prometheus.GaugeValue, up, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(cpuUsageRatioDesc, prometheus.GaugeValue, e.CPU, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(cpuUsageLimitDesc, prometheus.GaugeValue, e.MaxCPU, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(memoryUsageBytesDesc, prometheus.GaugeValue, e.Mem, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(memorySizeBytesDesc, prometheus.GaugeValue, e.MaxMem, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(diskUsageBytesDesc, prometheus.GaugeValue, e.Disk, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(diskSizeBytesDesc, prometheus.GaugeValue, e.MaxDisk, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(uptimeSecondsDesc, prometheus.GaugeValue, e.Uptime, e.ID)
|
|
}
|
|
|
|
func (c *clusterResourcesCollector) emitGuest(ch chan<- prometheus.Metric, e resourceEntry) {
|
|
var up float64
|
|
if e.Status == "running" {
|
|
up = 1
|
|
}
|
|
ch <- prometheus.MustNewConstMetric(upDesc, prometheus.GaugeValue, up, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(cpuUsageRatioDesc, prometheus.GaugeValue, e.CPU, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(cpuUsageLimitDesc, prometheus.GaugeValue, e.MaxCPU, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(memoryUsageBytesDesc, prometheus.GaugeValue, e.Mem, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(memorySizeBytesDesc, prometheus.GaugeValue, e.MaxMem, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(diskUsageBytesDesc, prometheus.GaugeValue, e.Disk, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(diskSizeBytesDesc, prometheus.GaugeValue, e.MaxDisk, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(uptimeSecondsDesc, prometheus.GaugeValue, e.Uptime, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(networkTransmitBytesTotalDesc, prometheus.CounterValue, e.NetOut, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(networkReceiveBytesTotalDesc, prometheus.CounterValue, e.NetIn, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(diskWrittenBytesTotalDesc, prometheus.CounterValue, e.DiskWrite, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(diskReadBytesTotalDesc, prometheus.CounterValue, e.DiskRead, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(guestInfoDesc, prometheus.GaugeValue, 1,
|
|
e.ID, e.Node, e.Name, e.Type, strconv.Itoa(e.Template), e.Tags,
|
|
)
|
|
|
|
if e.HAState != "" {
|
|
ch <- prometheus.MustNewConstMetric(haStateDesc, prometheus.GaugeValue, 1, e.ID, e.HAState)
|
|
}
|
|
|
|
if e.Lock != "" {
|
|
ch <- prometheus.MustNewConstMetric(lockStateDesc, prometheus.GaugeValue, 1, e.ID, e.Lock)
|
|
}
|
|
}
|
|
|
|
func (c *clusterResourcesCollector) emitStorage(ch chan<- prometheus.Metric, e resourceEntry) {
|
|
ch <- prometheus.MustNewConstMetric(diskUsageBytesDesc, prometheus.GaugeValue, e.Disk, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(diskSizeBytesDesc, prometheus.GaugeValue, e.MaxDisk, e.ID)
|
|
ch <- prometheus.MustNewConstMetric(storageSharedDesc, prometheus.GaugeValue, float64(e.Shared), e.ID)
|
|
ch <- prometheus.MustNewConstMetric(storageInfoDesc, prometheus.GaugeValue, 1,
|
|
e.ID, e.Node, e.Storage, e.PluginType, e.Content,
|
|
)
|
|
}
|