diff --git a/README.md b/README.md index ffa666f..600435e 100644 --- a/README.md +++ b/README.md @@ -165,8 +165,8 @@ Create a PVE API token with at least `PVEAuditor` role. Provide it via: | `pve_ha_node_status` | Gauge | `node`, `status` | Per-node HA status (always 1) | | `pve_ha_lrm_timestamp_seconds` | Gauge | `node` | Last LRM heartbeat as Unix timestamp | | `pve_ha_lrm_mode` | Gauge | `node`, `mode` | LRM mode per node (always 1) | -| `pve_ha_service_config` | Gauge | `sid`, `type`, `max_restart`, `max_relocate`, `failback` | Service config (always 1) | -| `pve_ha_service_status` | Gauge | `sid`, `node`, `state` | Service runtime state (always 1) | +| `pve_ha_service_config` | Gauge | `id`, `type`, `max_restart`, `max_relocate`, `failback` | Service config (always 1) | +| `pve_ha_service_status` | Gauge | `id`, `node`, `state` | Service runtime state (always 1) | ### Physical Disks diff --git a/collector/ha_status.go b/collector/ha_status.go index dae59b0..10c264d 100644 --- a/collector/ha_status.go +++ b/collector/ha_status.go @@ -5,6 +5,7 @@ import ( "fmt" "log/slog" "strconv" + "strings" "github.com/prometheus/client_golang/prometheus" ) @@ -87,12 +88,12 @@ var ( haServiceConfigDesc = prometheus.NewDesc( prometheus.BuildFQName(namespace, "ha", "service_config"), "HA service configuration.", - []string{"sid", "type", "max_restart", "max_relocate", "failback"}, nil, + []string{"id", "type", "max_restart", "max_relocate", "failback"}, nil, ) haServiceStatusDesc = prometheus.NewDesc( prometheus.BuildFQName(namespace, "ha", "service_status"), "HA service runtime status.", - []string{"sid", "node", "state"}, nil, + []string{"id", "node", "state"}, nil, ) ) @@ -132,7 +133,7 @@ func (c *haStatusCollector) Update(client *Client, ch chan<- prometheus.Metric) // Service runtime status from manager_status. for sid, svc := range mgr.ServiceStatus { - ch <- prometheus.MustNewConstMetric(haServiceStatusDesc, prometheus.GaugeValue, 1, sid, svc.Node, svc.State) + ch <- prometheus.MustNewConstMetric(haServiceStatusDesc, prometheus.GaugeValue, 1, haSIDToID(sid), svc.Node, svc.State) } // Fetch HA resources for service config. @@ -148,10 +149,27 @@ func (c *haStatusCollector) Update(client *Client, ch chan<- prometheus.Metric) for _, res := range resResp.Data { ch <- prometheus.MustNewConstMetric(haServiceConfigDesc, prometheus.GaugeValue, 1, - res.SID, res.Type, + haSIDToID(res.SID), res.Type, strconv.Itoa(res.MaxRestart), strconv.Itoa(res.MaxRelocate), strconv.Itoa(res.Failback), ) } return nil } + +// haSIDToID converts HA service IDs (e.g. "vm:106", "ct:200") to the +// resource ID format used by /cluster/resources (e.g. "qemu/106", "lxc/200"). +func haSIDToID(sid string) string { + parts := strings.SplitN(sid, ":", 2) + if len(parts) != 2 { + return sid + } + switch parts[0] { + case "vm": + return "qemu/" + parts[1] + case "ct": + return "lxc/" + parts[1] + default: + return sid + } +} diff --git a/collector/ha_status_test.go b/collector/ha_status_test.go index e580143..ff6252b 100644 --- a/collector/ha_status_test.go +++ b/collector/ha_status_test.go @@ -44,10 +44,10 @@ pve_ha_node_status{node="node02",status="online"} 1 pve_ha_node_status{node="node03",status="online"} 1 # HELP pve_ha_service_config HA service configuration. # TYPE pve_ha_service_config gauge -pve_ha_service_config{failback="1",max_relocate="2",max_restart="2",sid="vm:106",type="vm"} 1 +pve_ha_service_config{failback="1",id="qemu/106",max_relocate="2",max_restart="2",type="vm"} 1 # HELP pve_ha_service_status HA service runtime status. # TYPE pve_ha_service_status gauge -pve_ha_service_status{node="node01",sid="vm:106",state="started"} 1 +pve_ha_service_status{id="qemu/106",node="node01",state="started"} 1 ` if err := testutil.GatherAndCompare(reg, strings.NewReader(expected),