feat: add corosync collector (quorate, nodes_total, expected_votes, node_online)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Davíð Steinn Geirsson 2026-03-20 11:32:00 +00:00
parent 63494d0fcb
commit 2a51e00fe1
3 changed files with 167 additions and 0 deletions

118
collector/corosync.go Normal file
View file

@ -0,0 +1,118 @@
package collector
import (
"encoding/json"
"fmt"
"log/slog"
"strconv"
"github.com/prometheus/client_golang/prometheus"
)
func init() {
registerCollector("corosync", func(logger *slog.Logger) Collector {
return newCorosyncCollector(logger)
})
}
type corosyncCollector struct {
logger *slog.Logger
}
func newCorosyncCollector(logger *slog.Logger) *corosyncCollector {
return &corosyncCollector{logger: logger}
}
type clusterConfigNodesResponse struct {
Data []clusterConfigNodeEntry `json:"data"`
}
type clusterConfigNodeEntry struct {
Node string `json:"node"`
NodeID string `json:"nodeid"`
QuorumVotes string `json:"quorum_votes"`
}
var (
clusterQuorateDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "cluster", "quorate"),
"Whether the cluster is quorate.",
nil,
nil,
)
clusterNodesTotalDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "cluster", "nodes_total"),
"Total number of nodes in the cluster.",
nil,
nil,
)
clusterExpectedVotesDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "cluster", "expected_votes"),
"Total expected votes in the cluster.",
nil,
nil,
)
nodeOnlineDesc = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "node_online"),
"Whether a node is online.",
[]string{"name", "nodeid"},
nil,
)
)
func (c *corosyncCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
// Fetch cluster status for quorate, node count, and node online state.
statusBody, err := client.Get("/cluster/status")
if err != nil {
return fmt.Errorf("failed to get /cluster/status: %w", err)
}
var statusResp clusterStatusResponse
if err := json.Unmarshal(statusBody, &statusResp); err != nil {
return fmt.Errorf("failed to parse /cluster/status response: %w", err)
}
var nodeCount int
for _, entry := range statusResp.Data {
switch entry.Type {
case "cluster":
ch <- prometheus.MustNewConstMetric(clusterQuorateDesc, prometheus.GaugeValue, float64(entry.Quorate))
case "node":
nodeCount++
ch <- prometheus.MustNewConstMetric(
nodeOnlineDesc,
prometheus.GaugeValue,
float64(entry.Online),
entry.Name,
strconv.Itoa(entry.NodeID),
)
}
}
ch <- prometheus.MustNewConstMetric(clusterNodesTotalDesc, prometheus.GaugeValue, float64(nodeCount))
// Fetch cluster config nodes for expected votes.
configBody, err := client.Get("/cluster/config/nodes")
if err != nil {
return fmt.Errorf("failed to get /cluster/config/nodes: %w", err)
}
var configResp clusterConfigNodesResponse
if err := json.Unmarshal(configBody, &configResp); err != nil {
return fmt.Errorf("failed to parse /cluster/config/nodes response: %w", err)
}
var expectedVotes float64
for _, node := range configResp.Data {
votes, err := strconv.ParseFloat(node.QuorumVotes, 64)
if err != nil {
c.logger.Warn("failed to parse quorum_votes", "node", node.Node, "err", err)
continue
}
expectedVotes += votes
}
ch <- prometheus.MustNewConstMetric(clusterExpectedVotesDesc, prometheus.GaugeValue, expectedVotes)
return nil
}

View file

@ -0,0 +1,48 @@
package collector
import (
"log/slog"
"strings"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/testutil"
)
func TestCorosyncCollector(t *testing.T) {
client := newTestClient(t, map[string]string{
"/cluster/status": "cluster_status.json",
"/cluster/config/nodes": "cluster_config_nodes.json",
})
collector := newCorosyncCollector(slog.Default())
adapter := &testCollectorAdapter{client: client, collector: collector}
reg := prometheus.NewRegistry()
reg.MustRegister(adapter)
expected := `
# HELP pve_cluster_expected_votes Total expected votes in the cluster.
# TYPE pve_cluster_expected_votes gauge
pve_cluster_expected_votes 5
# HELP pve_cluster_nodes_total Total number of nodes in the cluster.
# TYPE pve_cluster_nodes_total gauge
pve_cluster_nodes_total 5
# HELP pve_cluster_quorate Whether the cluster is quorate.
# TYPE pve_cluster_quorate gauge
pve_cluster_quorate 1
# HELP pve_node_online Whether a node is online.
# TYPE pve_node_online gauge
pve_node_online{name="node01",nodeid="1"} 1
pve_node_online{name="node02",nodeid="2"} 1
pve_node_online{name="node03",nodeid="3"} 1
pve_node_online{name="node04",nodeid="4"} 1
pve_node_online{name="node05",nodeid="5"} 1
`
if err := testutil.GatherAndCompare(reg, strings.NewReader(expected),
"pve_cluster_quorate", "pve_cluster_nodes_total", "pve_cluster_expected_votes", "pve_node_online",
); err != nil {
t.Errorf("unexpected metrics: %s", err)
}
}

View file

@ -0,0 +1 @@
{"data":[{"node":"node01","nodeid":"1","quorum_votes":"1"},{"node":"node02","nodeid":"2","quorum_votes":"1"},{"node":"node03","nodeid":"3","quorum_votes":"1"},{"node":"node04","nodeid":"4","quorum_votes":"1"},{"node":"node05","nodeid":"5","quorum_votes":"1"}]}