From 18bb43394e378516ee8f4833a7b2ecf2f46c7dfa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Dav=C3=AD=C3=B0=20Steinn=20Geirsson?= <david@dsg.is>
Date: Fri, 20 Mar 2026 15:20:17 +0000
Subject: [PATCH] docs: add implementation plan for remaining collectors

9 tasks covering node_status, vm_pressure, ha_status, and physical_disk
collectors with TDD approach, fixtures, and README update.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .../2026-03-20-remaining-collectors-plan.md   | 1217 +++++++++++++++++
 1 file changed, 1217 insertions(+)
 create mode 100644 docs/superpowers/plans/2026-03-20-remaining-collectors-plan.md

diff --git a/docs/superpowers/plans/2026-03-20-remaining-collectors-plan.md b/docs/superpowers/plans/2026-03-20-remaining-collectors-plan.md
new file mode 100644
index 0000000..6702efc
--- /dev/null
+++ b/docs/superpowers/plans/2026-03-20-remaining-collectors-plan.md
@@ -0,0 +1,1217 @@
+# Remaining Collectors Implementation Plan
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Add 4 new collectors (node_status, vm_pressure, ha_status, physical_disk) covering all remaining TODO metrics from the README.
+
+**Architecture:** Each collector is a standalone file following the existing pattern: init() self-registration, NodeAwareCollector or plain Collector interface, per-node fan-out with semaphore-bounded concurrency. Tests use JSON fixtures and testutil.GatherAndCompare.
+
+**Tech Stack:** Go, prometheus/client_golang, slog
+
+**Spec:** `docs/superpowers/specs/2026-03-20-remaining-collectors-design.md`
+
+---
+
+### Task 1: Node Status Collector — Test + Fixture
+
+**Files:**
+- Create: `collector/fixtures/node_status.json`
+- Create: `collector/node_status_test.go`
+
+- [ ] **Step 1: Create the test fixture**
+
+Create `collector/fixtures/node_status.json`:
+
+```json
+{"data":{"loadavg":["3.12","2.88","2.79"],"swap":{"total":8589930496,"used":1048576,"free":8588881920},"rootfs":{"used":28747304960,"total":100861726720,"avail":66943684608},"ksm":{"shared":4096},"boot-info":{"mode":"efi","secureboot":0}}}
+```
+
+- [ ] **Step 2: Write the test**
+
+Create `collector/node_status_test.go`:
+
+```go
+package collector
+
+import (
+	"log/slog"
+	"strings"
+	"testing"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+)
+
+func TestNodeStatusCollector(t *testing.T) {
+	client := newTestClient(t, map[string]string{
+		"/nodes/node01/status": "node_status.json",
+	})
+
+	collector := newNodeStatusCollector(slog.Default())
+	collector.SetNodes([]string{"node01"})
+	adapter := &testCollectorAdapter{client: client, collector: collector}
+
+	reg := prometheus.NewRegistry()
+	reg.MustRegister(adapter)
+
+	expected := `
+# HELP pve_node_boot_mode_info Node boot mode information.
+# TYPE pve_node_boot_mode_info gauge
+pve_node_boot_mode_info{mode="efi",node="node01",secureboot="0"} 1
+# HELP pve_node_ksm_shared_bytes KSM shared memory in bytes.
+# TYPE pve_node_ksm_shared_bytes gauge
+pve_node_ksm_shared_bytes{node="node01"} 4096
+# HELP pve_node_load1 1-minute load average.
+# TYPE pve_node_load1 gauge
+pve_node_load1{node="node01"} 3.12
+# HELP pve_node_load15 15-minute load average.
+# TYPE pve_node_load15 gauge
+pve_node_load15{node="node01"} 2.79
+# HELP pve_node_load5 5-minute load average.
+# TYPE pve_node_load5 gauge
+pve_node_load5{node="node01"} 2.88
+# HELP pve_node_rootfs_available_bytes Root filesystem available space in bytes.
+# TYPE pve_node_rootfs_available_bytes gauge
+pve_node_rootfs_available_bytes{node="node01"} 6.6943684608e+10
+# HELP pve_node_rootfs_total_bytes Root filesystem total size in bytes.
+# TYPE pve_node_rootfs_total_bytes gauge
+pve_node_rootfs_total_bytes{node="node01"} 1.0086172672e+11
+# HELP pve_node_rootfs_used_bytes Root filesystem used space in bytes.
+# TYPE pve_node_rootfs_used_bytes gauge
+pve_node_rootfs_used_bytes{node="node01"} 2.874730496e+10
+# HELP pve_node_swap_free_bytes Free swap in bytes.
+# TYPE pve_node_swap_free_bytes gauge
+pve_node_swap_free_bytes{node="node01"} 8.58888192e+09
+# HELP pve_node_swap_total_bytes Total swap in bytes.
+# TYPE pve_node_swap_total_bytes gauge
+pve_node_swap_total_bytes{node="node01"} 8.589930496e+09
+# HELP pve_node_swap_used_bytes Used swap in bytes.
+# TYPE pve_node_swap_used_bytes gauge
+pve_node_swap_used_bytes{node="node01"} 1.048576e+06
+`
+
+	if err := testutil.GatherAndCompare(reg, strings.NewReader(expected),
+		"pve_node_load1", "pve_node_load5", "pve_node_load15",
+		"pve_node_swap_total_bytes", "pve_node_swap_used_bytes", "pve_node_swap_free_bytes",
+		"pve_node_rootfs_total_bytes", "pve_node_rootfs_used_bytes", "pve_node_rootfs_available_bytes",
+		"pve_node_ksm_shared_bytes", "pve_node_boot_mode_info",
+	); err != nil {
+		t.Errorf("unexpected metrics: %s", err)
+	}
+}
+```
+
+- [ ] **Step 3: Run test to verify it fails**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -run TestNodeStatusCollector -v`
+Expected: FAIL — `newNodeStatusCollector` not defined.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add collector/fixtures/node_status.json collector/node_status_test.go
+git commit -m "test: add node_status collector test and fixture"
+```
+
+---
+
+### Task 2: Node Status Collector — Implementation
+
+**Files:**
+- Create: `collector/node_status.go`
+
+- [ ] **Step 1: Implement the collector**
+
+Create `collector/node_status.go`:
+
+```go
+package collector
+
+import (
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"strconv"
+	"sync"
+
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+func init() {
+	registerCollector("node_status", func(logger *slog.Logger) Collector {
+		return newNodeStatusCollector(logger)
+	})
+}
+
+type nodeStatusCollector struct {
+	logger *slog.Logger
+	mu     sync.Mutex
+	nodes  []string
+}
+
+func newNodeStatusCollector(logger *slog.Logger) *nodeStatusCollector {
+	return &nodeStatusCollector{logger: logger}
+}
+
+func (c *nodeStatusCollector) SetNodes(nodes []string) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.nodes = nodes
+}
+
+type nodeStatusResponse struct {
+	Data nodeStatusData `json:"data"`
+}
+
+type nodeStatusData struct {
+	LoadAvg  []string       `json:"loadavg"`
+	Swap     nodeStatusMem  `json:"swap"`
+	RootFS   nodeStatusFS   `json:"rootfs"`
+	KSM      nodeStatusKSM  `json:"ksm"`
+	BootInfo nodeStatusBoot `json:"boot-info"`
+}
+
+type nodeStatusMem struct {
+	Total float64 `json:"total"`
+	Used  float64 `json:"used"`
+	Free  float64 `json:"free"`
+}
+
+type nodeStatusFS struct {
+	Total float64 `json:"total"`
+	Used  float64 `json:"used"`
+	Avail float64 `json:"avail"`
+}
+
+type nodeStatusKSM struct {
+	Shared float64 `json:"shared"`
+}
+
+type nodeStatusBoot struct {
+	Mode       string `json:"mode"`
+	SecureBoot int    `json:"secureboot"`
+}
+
+var (
+	nodeLoad1Desc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "load1"),
+		"1-minute load average.",
+		[]string{"node"}, nil,
+	)
+	nodeLoad5Desc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "load5"),
+		"5-minute load average.",
+		[]string{"node"}, nil,
+	)
+	nodeLoad15Desc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "load15"),
+		"15-minute load average.",
+		[]string{"node"}, nil,
+	)
+	nodeSwapTotalDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "swap_total_bytes"),
+		"Total swap in bytes.",
+		[]string{"node"}, nil,
+	)
+	nodeSwapUsedDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "swap_used_bytes"),
+		"Used swap in bytes.",
+		[]string{"node"}, nil,
+	)
+	nodeSwapFreeDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "swap_free_bytes"),
+		"Free swap in bytes.",
+		[]string{"node"}, nil,
+	)
+	nodeRootfsTotalDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "rootfs_total_bytes"),
+		"Root filesystem total size in bytes.",
+		[]string{"node"}, nil,
+	)
+	nodeRootfsUsedDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "rootfs_used_bytes"),
+		"Root filesystem used space in bytes.",
+		[]string{"node"}, nil,
+	)
+	nodeRootfsAvailDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "rootfs_available_bytes"),
+		"Root filesystem available space in bytes.",
+		[]string{"node"}, nil,
+	)
+	nodeKSMSharedDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "ksm_shared_bytes"),
+		"KSM shared memory in bytes.",
+		[]string{"node"}, nil,
+	)
+	nodeBootModeDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "node", "boot_mode_info"),
+		"Node boot mode information.",
+		[]string{"node", "mode", "secureboot"}, nil,
+	)
+)
+
+func (c *nodeStatusCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
+	c.mu.Lock()
+	nodes := make([]string, len(c.nodes))
+	copy(nodes, c.nodes)
+	c.mu.Unlock()
+
+	var (
+		wg   sync.WaitGroup
+		errs []error
+		emu  sync.Mutex
+	)
+
+	sem := make(chan struct{}, client.MaxConcurrent())
+
+	for _, node := range nodes {
+		wg.Add(1)
+		go func(node string) {
+			defer wg.Done()
+			sem <- struct{}{}
+			defer func() { <-sem }()
+
+			if err := c.collectNode(client, ch, node); err != nil {
+				emu.Lock()
+				errs = append(errs, err)
+				emu.Unlock()
+			}
+		}(node)
+	}
+	wg.Wait()
+
+	if len(errs) > 0 {
+		return fmt.Errorf("node_status collection errors: %v", errs)
+	}
+	return nil
+}
+
+func (c *nodeStatusCollector) collectNode(client *Client, ch chan<- prometheus.Metric, node string) error {
+	body, err := client.Get(fmt.Sprintf("/nodes/%s/status", node))
+	if err != nil {
+		return fmt.Errorf("failed to get status for node %s: %w", node, err)
+	}
+
+	var resp nodeStatusResponse
+	if err := json.Unmarshal(body, &resp); err != nil {
+		return fmt.Errorf("failed to parse status response for node %s: %w", node, err)
+	}
+
+	d := resp.Data
+
+	// Load averages (strings in API).
+	if len(d.LoadAvg) >= 3 {
+		for i, desc := range []*prometheus.Desc{nodeLoad1Desc, nodeLoad5Desc, nodeLoad15Desc} {
+			val, err := strconv.ParseFloat(d.LoadAvg[i], 64)
+			if err != nil {
+				c.logger.Warn("failed to parse load average", "node", node, "index", i, "err", err)
+				continue
+			}
+			ch <- prometheus.MustNewConstMetric(desc, prometheus.GaugeValue, val, node)
+		}
+	}
+
+	// Swap.
+	ch <- prometheus.MustNewConstMetric(nodeSwapTotalDesc, prometheus.GaugeValue, d.Swap.Total, node)
+	ch <- prometheus.MustNewConstMetric(nodeSwapUsedDesc, prometheus.GaugeValue, d.Swap.Used, node)
+	ch <- prometheus.MustNewConstMetric(nodeSwapFreeDesc, prometheus.GaugeValue, d.Swap.Free, node)
+
+	// Root filesystem.
+	ch <- prometheus.MustNewConstMetric(nodeRootfsTotalDesc, prometheus.GaugeValue, d.RootFS.Total, node)
+	ch <- prometheus.MustNewConstMetric(nodeRootfsUsedDesc, prometheus.GaugeValue, d.RootFS.Used, node)
+	ch <- prometheus.MustNewConstMetric(nodeRootfsAvailDesc, prometheus.GaugeValue, d.RootFS.Avail, node)
+
+	// KSM.
+	ch <- prometheus.MustNewConstMetric(nodeKSMSharedDesc, prometheus.GaugeValue, d.KSM.Shared, node)
+
+	// Boot mode info.
+	secureboot := strconv.Itoa(d.BootInfo.SecureBoot)
+	ch <- prometheus.MustNewConstMetric(nodeBootModeDesc, prometheus.GaugeValue, 1, node, d.BootInfo.Mode, secureboot)
+
+	return nil
+}
+```
+
+- [ ] **Step 2: Run tests**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -run TestNodeStatusCollector -v`
+Expected: PASS
+
+- [ ] **Step 3: Run all tests to check for regressions**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -v`
+Expected: All tests PASS
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add collector/node_status.go
+git commit -m "feat: add node_status collector (load, swap, rootfs, ksm, boot mode)"
+```
+
+---
+
+### Task 3: VM Pressure Collector — Test + Fixture
+
+**Files:**
+- Create: `collector/fixtures/node_qemu_pressure.json`
+- Create: `collector/vm_pressure_test.go`
+
+- [ ] **Step 1: Create the test fixture**
+
+Create `collector/fixtures/node_qemu_pressure.json` with one running VM (has pressure fields) and one stopped VM (no pressure fields):
+
+```json
+{"data":[{"vmid":112,"status":"running","name":"karmada","cpus":4,"pressurecpusome":1.5,"pressurecpufull":0.3,"pressurememorysome":2.1,"pressurememoryfull":0.0,"pressureiosome":0.8,"pressureiofull":0.1,"mem":6287664128,"maxmem":8589934592},{"vmid":104,"status":"stopped","name":"test3","cpus":2,"mem":0,"maxmem":4294967296}]}
+```
+
+- [ ] **Step 2: Write the test**
+
+Create `collector/vm_pressure_test.go`:
+
+```go
+package collector
+
+import (
+	"log/slog"
+	"strings"
+	"testing"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+)
+
+func TestVMPressureCollector(t *testing.T) {
+	client := newTestClient(t, map[string]string{
+		"/nodes/node01/qemu": "node_qemu_pressure.json",
+	})
+
+	collector := newVMPressureCollector(slog.Default())
+	collector.SetNodes([]string{"node01"})
+	adapter := &testCollectorAdapter{client: client, collector: collector}
+
+	reg := prometheus.NewRegistry()
+	reg.MustRegister(adapter)
+
+	expected := `
+# HELP pve_vm_pressure_cpu_full_ratio CPU pressure (full) ratio.
+# TYPE pve_vm_pressure_cpu_full_ratio gauge
+pve_vm_pressure_cpu_full_ratio{id="qemu/112",node="node01"} 0.003
+# HELP pve_vm_pressure_cpu_some_ratio CPU pressure (some) ratio.
+# TYPE pve_vm_pressure_cpu_some_ratio gauge
+pve_vm_pressure_cpu_some_ratio{id="qemu/112",node="node01"} 0.015
+# HELP pve_vm_pressure_io_full_ratio I/O pressure (full) ratio.
+# TYPE pve_vm_pressure_io_full_ratio gauge
+pve_vm_pressure_io_full_ratio{id="qemu/112",node="node01"} 0.001
+# HELP pve_vm_pressure_io_some_ratio I/O pressure (some) ratio.
+# TYPE pve_vm_pressure_io_some_ratio gauge
+pve_vm_pressure_io_some_ratio{id="qemu/112",node="node01"} 0.008
+# HELP pve_vm_pressure_memory_full_ratio Memory pressure (full) ratio.
+# TYPE pve_vm_pressure_memory_full_ratio gauge
+pve_vm_pressure_memory_full_ratio{id="qemu/112",node="node01"} 0
+# HELP pve_vm_pressure_memory_some_ratio Memory pressure (some) ratio.
+# TYPE pve_vm_pressure_memory_some_ratio gauge
+pve_vm_pressure_memory_some_ratio{id="qemu/112",node="node01"} 0.021
+`
+
+	if err := testutil.GatherAndCompare(reg, strings.NewReader(expected),
+		"pve_vm_pressure_cpu_some_ratio", "pve_vm_pressure_cpu_full_ratio",
+		"pve_vm_pressure_memory_some_ratio", "pve_vm_pressure_memory_full_ratio",
+		"pve_vm_pressure_io_some_ratio", "pve_vm_pressure_io_full_ratio",
+	); err != nil {
+		t.Errorf("unexpected metrics: %s", err)
+	}
+}
+```
+
+- [ ] **Step 3: Run test to verify it fails**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -run TestVMPressureCollector -v`
+Expected: FAIL — `newVMPressureCollector` not defined.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add collector/fixtures/node_qemu_pressure.json collector/vm_pressure_test.go
+git commit -m "test: add vm_pressure collector test and fixture"
+```
+
+---
+
+### Task 4: VM Pressure Collector — Implementation
+
+**Files:**
+- Create: `collector/vm_pressure.go`
+
+- [ ] **Step 1: Implement the collector**
+
+Create `collector/vm_pressure.go`:
+
+```go
+package collector
+
+import (
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"sync"
+
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+func init() {
+	registerCollector("vm_pressure", func(logger *slog.Logger) Collector {
+		return newVMPressureCollector(logger)
+	})
+}
+
+type vmPressureCollector struct {
+	logger *slog.Logger
+	mu     sync.Mutex
+	nodes  []string
+}
+
+func newVMPressureCollector(logger *slog.Logger) *vmPressureCollector {
+	return &vmPressureCollector{logger: logger}
+}
+
+func (c *vmPressureCollector) SetNodes(nodes []string) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.nodes = nodes
+}
+
+type vmPressureResponse struct {
+	Data []vmPressureEntry `json:"data"`
+}
+
+type vmPressureEntry struct {
+	VMID               int     `json:"vmid"`
+	Status             string  `json:"status"`
+	PressureCPUSome    float64 `json:"pressurecpusome"`
+	PressureCPUFull    float64 `json:"pressurecpufull"`
+	PressureMemSome    float64 `json:"pressurememorysome"`
+	PressureMemFull    float64 `json:"pressurememoryfull"`
+	PressureIOSome     float64 `json:"pressureiosome"`
+	PressureIOFull     float64 `json:"pressureiofull"`
+}
+
+var (
+	vmPressureCPUSomeDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "vm_pressure", "cpu_some_ratio"),
+		"CPU pressure (some) ratio.",
+		[]string{"id", "node"}, nil,
+	)
+	vmPressureCPUFullDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "vm_pressure", "cpu_full_ratio"),
+		"CPU pressure (full) ratio.",
+		[]string{"id", "node"}, nil,
+	)
+	vmPressureMemSomeDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "vm_pressure", "memory_some_ratio"),
+		"Memory pressure (some) ratio.",
+		[]string{"id", "node"}, nil,
+	)
+	vmPressureMemFullDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "vm_pressure", "memory_full_ratio"),
+		"Memory pressure (full) ratio.",
+		[]string{"id", "node"}, nil,
+	)
+	vmPressureIOSomeDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "vm_pressure", "io_some_ratio"),
+		"I/O pressure (some) ratio.",
+		[]string{"id", "node"}, nil,
+	)
+	vmPressureIOFullDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "vm_pressure", "io_full_ratio"),
+		"I/O pressure (full) ratio.",
+		[]string{"id", "node"}, nil,
+	)
+)
+
+func (c *vmPressureCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
+	c.mu.Lock()
+	nodes := make([]string, len(c.nodes))
+	copy(nodes, c.nodes)
+	c.mu.Unlock()
+
+	var (
+		wg   sync.WaitGroup
+		errs []error
+		emu  sync.Mutex
+	)
+
+	sem := make(chan struct{}, client.MaxConcurrent())
+
+	for _, node := range nodes {
+		wg.Add(1)
+		go func(node string) {
+			defer wg.Done()
+			sem <- struct{}{}
+			defer func() { <-sem }()
+
+			if err := c.collectNode(client, ch, node); err != nil {
+				emu.Lock()
+				errs = append(errs, err)
+				emu.Unlock()
+			}
+		}(node)
+	}
+	wg.Wait()
+
+	if len(errs) > 0 {
+		return fmt.Errorf("vm_pressure collection errors: %v", errs)
+	}
+	return nil
+}
+
+func (c *vmPressureCollector) collectNode(client *Client, ch chan<- prometheus.Metric, node string) error {
+	body, err := client.Get(fmt.Sprintf("/nodes/%s/qemu", node))
+	if err != nil {
+		return fmt.Errorf("failed to get qemu list for node %s: %w", node, err)
+	}
+
+	var resp vmPressureResponse
+	if err := json.Unmarshal(body, &resp); err != nil {
+		return fmt.Errorf("failed to parse qemu list for node %s: %w", node, err)
+	}
+
+	for _, vm := range resp.Data {
+		if vm.Status != "running" {
+			continue
+		}
+
+		id := fmt.Sprintf("qemu/%d", vm.VMID)
+
+		ch <- prometheus.MustNewConstMetric(vmPressureCPUSomeDesc, prometheus.GaugeValue, vm.PressureCPUSome/100, id, node)
+		ch <- prometheus.MustNewConstMetric(vmPressureCPUFullDesc, prometheus.GaugeValue, vm.PressureCPUFull/100, id, node)
+		ch <- prometheus.MustNewConstMetric(vmPressureMemSomeDesc, prometheus.GaugeValue, vm.PressureMemSome/100, id, node)
+		ch <- prometheus.MustNewConstMetric(vmPressureMemFullDesc, prometheus.GaugeValue, vm.PressureMemFull/100, id, node)
+		ch <- prometheus.MustNewConstMetric(vmPressureIOSomeDesc, prometheus.GaugeValue, vm.PressureIOSome/100, id, node)
+		ch <- prometheus.MustNewConstMetric(vmPressureIOFullDesc, prometheus.GaugeValue, vm.PressureIOFull/100, id, node)
+	}
+
+	return nil
+}
+```
+
+- [ ] **Step 2: Run tests**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -run TestVMPressureCollector -v`
+Expected: PASS
+
+- [ ] **Step 3: Run all tests**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -v`
+Expected: All tests PASS
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add collector/vm_pressure.go
+git commit -m "feat: add vm_pressure collector (PSI cpu/memory/io for QEMU VMs)"
+```
+
+---
+
+### Task 5: HA Status Collector — Test + Fixtures
+
+**Files:**
+- Create: `collector/fixtures/ha_manager_status.json`
+- Create: `collector/fixtures/ha_resources.json`
+- Create: `collector/ha_status_test.go`
+
+- [ ] **Step 1: Create the manager_status fixture**
+
+Create `collector/fixtures/ha_manager_status.json`:
+
+```json
+{"data":{"manager_status":{"master_node":"node03","node_status":{"node01":"online","node02":"online","node03":"online"},"service_status":{"vm:106":{"node":"node01","running":1,"state":"started"}}},"lrm_status":{"node01":{"mode":"active","state":"active","timestamp":1774016350},"node02":{"mode":"active","state":"wait_for_agent_lock","timestamp":1774016351},"node03":{"mode":"active","state":"wait_for_agent_lock","timestamp":1774016351}}}}
+```
+
+- [ ] **Step 2: Create the ha_resources fixture**
+
+Create `collector/fixtures/ha_resources.json`:
+
+```json
+{"data":[{"sid":"vm:106","type":"vm","state":"started","max_restart":2,"max_relocate":2,"failback":1}]}
+```
+
+- [ ] **Step 3: Write the test**
+
+Create `collector/ha_status_test.go`:
+
+```go
+package collector
+
+import (
+	"log/slog"
+	"strings"
+	"testing"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+)
+
+func TestHAStatusCollector(t *testing.T) {
+	client := newTestClient(t, map[string]string{
+		"/cluster/ha/status/manager_status": "ha_manager_status.json",
+		"/cluster/ha/resources":             "ha_resources.json",
+	})
+
+	collector := newHAStatusCollector(slog.Default())
+	adapter := &testCollectorAdapter{client: client, collector: collector}
+
+	reg := prometheus.NewRegistry()
+	reg.MustRegister(adapter)
+
+	expected := `
+# HELP pve_ha_crm_master Whether a node is the CRM master.
+# TYPE pve_ha_crm_master gauge
+pve_ha_crm_master{node="node01"} 0
+pve_ha_crm_master{node="node02"} 0
+pve_ha_crm_master{node="node03"} 1
+# HELP pve_ha_lrm_mode LRM mode for a node.
+# TYPE pve_ha_lrm_mode gauge
+pve_ha_lrm_mode{mode="active",node="node01"} 1
+pve_ha_lrm_mode{mode="active",node="node02"} 1
+pve_ha_lrm_mode{mode="active",node="node03"} 1
+# HELP pve_ha_lrm_timestamp_seconds Last LRM heartbeat as Unix timestamp.
+# TYPE pve_ha_lrm_timestamp_seconds gauge
+pve_ha_lrm_timestamp_seconds{node="node01"} 1.77401635e+09
+pve_ha_lrm_timestamp_seconds{node="node02"} 1.774016351e+09
+pve_ha_lrm_timestamp_seconds{node="node03"} 1.774016351e+09
+# HELP pve_ha_node_status HA node status.
+# TYPE pve_ha_node_status gauge
+pve_ha_node_status{node="node01",status="online"} 1
+pve_ha_node_status{node="node02",status="online"} 1
+pve_ha_node_status{node="node03",status="online"} 1
+# HELP pve_ha_service_config HA service configuration.
+# TYPE pve_ha_service_config gauge
+pve_ha_service_config{failback="1",max_relocate="2",max_restart="2",sid="vm:106",type="vm"} 1
+# HELP pve_ha_service_status HA service runtime status.
+# TYPE pve_ha_service_status gauge
+pve_ha_service_status{node="node01",sid="vm:106",state="started"} 1
+`
+
+	if err := testutil.GatherAndCompare(reg, strings.NewReader(expected),
+		"pve_ha_crm_master", "pve_ha_node_status",
+		"pve_ha_lrm_timestamp_seconds", "pve_ha_lrm_mode",
+		"pve_ha_service_config", "pve_ha_service_status",
+	); err != nil {
+		t.Errorf("unexpected metrics: %s", err)
+	}
+}
+```
+
+- [ ] **Step 4: Run test to verify it fails**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -run TestHAStatusCollector -v`
+Expected: FAIL — `newHAStatusCollector` not defined.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add collector/fixtures/ha_manager_status.json collector/fixtures/ha_resources.json collector/ha_status_test.go
+git commit -m "test: add ha_status collector test and fixtures"
+```
+
+---
+
+### Task 6: HA Status Collector — Implementation
+
+**Files:**
+- Create: `collector/ha_status.go`
+
+- [ ] **Step 1: Implement the collector**
+
+Create `collector/ha_status.go`:
+
+```go
+package collector
+
+import (
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"strconv"
+
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+func init() {
+	registerCollector("ha_status", func(logger *slog.Logger) Collector {
+		return newHAStatusCollector(logger)
+	})
+}
+
+type haStatusCollector struct {
+	logger *slog.Logger
+}
+
+func newHAStatusCollector(logger *slog.Logger) *haStatusCollector {
+	return &haStatusCollector{logger: logger}
+}
+
+type haManagerStatusResponse struct {
+	Data haManagerStatusData `json:"data"`
+}
+
+type haManagerStatusData struct {
+	ManagerStatus haManagerStatus        `json:"manager_status"`
+	LRMStatus     map[string]haLRMEntry  `json:"lrm_status"`
+}
+
+type haManagerStatus struct {
+	MasterNode    string                       `json:"master_node"`
+	NodeStatus    map[string]string            `json:"node_status"`
+	ServiceStatus map[string]haServiceRuntime  `json:"service_status"`
+}
+
+type haLRMEntry struct {
+	Mode      string  `json:"mode"`
+	State     string  `json:"state"`
+	Timestamp float64 `json:"timestamp"`
+}
+
+type haServiceRuntime struct {
+	Node    string `json:"node"`
+	Running int    `json:"running"`
+	State   string `json:"state"`
+}
+
+type haResourcesResponse struct {
+	Data []haResourceEntry `json:"data"`
+}
+
+type haResourceEntry struct {
+	SID          string `json:"sid"`
+	Type         string `json:"type"`
+	State        string `json:"state"`
+	MaxRestart   int    `json:"max_restart"`
+	MaxRelocate  int    `json:"max_relocate"`
+	Failback     int    `json:"failback"`
+}
+
+var (
+	haCRMMasterDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "ha", "crm_master"),
+		"Whether a node is the CRM master.",
+		[]string{"node"}, nil,
+	)
+	haNodeStatusDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "ha", "node_status"),
+		"HA node status.",
+		[]string{"node", "status"}, nil,
+	)
+	haLRMTimestampDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "ha", "lrm_timestamp_seconds"),
+		"Last LRM heartbeat as Unix timestamp.",
+		[]string{"node"}, nil,
+	)
+	haLRMModeDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "ha", "lrm_mode"),
+		"LRM mode for a node.",
+		[]string{"node", "mode"}, nil,
+	)
+	haServiceConfigDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "ha", "service_config"),
+		"HA service configuration.",
+		[]string{"sid", "type", "max_restart", "max_relocate", "failback"}, nil,
+	)
+	haServiceStatusDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "ha", "service_status"),
+		"HA service runtime status.",
+		[]string{"sid", "node", "state"}, nil,
+	)
+)
+
+func (c *haStatusCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
+	// Fetch manager status.
+	mgrBody, err := client.Get("/cluster/ha/status/manager_status")
+	if err != nil {
+		return fmt.Errorf("failed to get HA manager status: %w", err)
+	}
+
+	var mgrResp haManagerStatusResponse
+	if err := json.Unmarshal(mgrBody, &mgrResp); err != nil {
+		return fmt.Errorf("failed to parse HA manager status: %w", err)
+	}
+
+	mgr := mgrResp.Data.ManagerStatus
+
+	// CRM master — emit for all nodes in node_status.
+	for node := range mgr.NodeStatus {
+		var val float64
+		if node == mgr.MasterNode {
+			val = 1
+		}
+		ch <- prometheus.MustNewConstMetric(haCRMMasterDesc, prometheus.GaugeValue, val, node)
+	}
+
+	// Node status.
+	for node, status := range mgr.NodeStatus {
+		ch <- prometheus.MustNewConstMetric(haNodeStatusDesc, prometheus.GaugeValue, 1, node, status)
+	}
+
+	// LRM status.
+	for node, lrm := range mgrResp.Data.LRMStatus {
+		ch <- prometheus.MustNewConstMetric(haLRMTimestampDesc, prometheus.GaugeValue, lrm.Timestamp, node)
+		ch <- prometheus.MustNewConstMetric(haLRMModeDesc, prometheus.GaugeValue, 1, node, lrm.Mode)
+	}
+
+	// Service runtime status from manager_status.
+	for sid, svc := range mgr.ServiceStatus {
+		ch <- prometheus.MustNewConstMetric(haServiceStatusDesc, prometheus.GaugeValue, 1, sid, svc.Node, svc.State)
+	}
+
+	// Fetch HA resources for service config.
+	resBody, err := client.Get("/cluster/ha/resources")
+	if err != nil {
+		return fmt.Errorf("failed to get HA resources: %w", err)
+	}
+
+	var resResp haResourcesResponse
+	if err := json.Unmarshal(resBody, &resResp); err != nil {
+		return fmt.Errorf("failed to parse HA resources: %w", err)
+	}
+
+	for _, res := range resResp.Data {
+		ch <- prometheus.MustNewConstMetric(haServiceConfigDesc, prometheus.GaugeValue, 1,
+			res.SID, res.Type,
+			strconv.Itoa(res.MaxRestart), strconv.Itoa(res.MaxRelocate), strconv.Itoa(res.Failback),
+		)
+	}
+
+	return nil
+}
+```
+
+- [ ] **Step 2: Run tests**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -run TestHAStatusCollector -v`
+Expected: PASS
+
+- [ ] **Step 3: Run all tests**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -v`
+Expected: All tests PASS
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add collector/ha_status.go
+git commit -m "feat: add ha_status collector (CRM master, node/LRM status, service config)"
+```
+
+---
+
+### Task 7: Physical Disk Collector — Test + Fixture
+
+**Files:**
+- Create: `collector/fixtures/node_disks.json`
+- Create: `collector/physical_disk_test.go`
+
+- [ ] **Step 1: Create the test fixture**
+
+Create `collector/fixtures/node_disks.json` with one OSD disk, one multi-OSD disk, and one non-OSD disk with `"N/A"` wearout:
+
+```json
+{"data":[{"devpath":"/dev/nvme0n1","health":"PASSED","wearout":95,"size":7681501126656,"model":"VV007680KYFFL","serial":"ADD3NA317I0104K2N","type":"nvme","used":"LVM","osdid":"8","osdid-list":["8"],"bluestore":1},{"devpath":"/dev/nvme1n1","health":"PASSED","wearout":88,"size":7681501126656,"model":"VV007680KYFFL","serial":"ADD3NA317I0104K2O","type":"nvme","used":"LVM","osdid":"9","osdid-list":["9","10"],"bluestore":1},{"devpath":"/dev/nvme4n1","health":"UNKNOWN","wearout":"N/A","size":480036519936,"model":"HPE NS204i-u","serial":"PXTYH0ARHJS080","type":"nvme","used":"BIOS boot","osdid":-1,"osdid-list":null}]}
+```
+
+- [ ] **Step 2: Write the test**
+
+Create `collector/physical_disk_test.go`:
+
+```go
+package collector
+
+import (
+	"log/slog"
+	"strings"
+	"testing"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/testutil"
+)
+
+func TestPhysicalDiskCollector(t *testing.T) {
+	client := newTestClient(t, map[string]string{
+		"/nodes/node01/disks/list": "node_disks.json",
+	})
+
+	collector := newPhysicalDiskCollector(slog.Default())
+	collector.SetNodes([]string{"node01"})
+	adapter := &testCollectorAdapter{client: client, collector: collector}
+
+	reg := prometheus.NewRegistry()
+	reg.MustRegister(adapter)
+
+	expected := `
+# HELP pve_physical_disk_health 1 if SMART health is PASSED, 0 otherwise.
+# TYPE pve_physical_disk_health gauge
+pve_physical_disk_health{devpath="/dev/nvme0n1",model="VV007680KYFFL",node="node01",serial="ADD3NA317I0104K2N",type="nvme"} 1
+pve_physical_disk_health{devpath="/dev/nvme1n1",model="VV007680KYFFL",node="node01",serial="ADD3NA317I0104K2O",type="nvme"} 1
+pve_physical_disk_health{devpath="/dev/nvme4n1",model="HPE NS204i-u",node="node01",serial="PXTYH0ARHJS080",type="nvme"} 0
+# HELP pve_physical_disk_info Physical disk information.
+# TYPE pve_physical_disk_info gauge
+pve_physical_disk_info{devpath="/dev/nvme0n1",model="VV007680KYFFL",node="node01",serial="ADD3NA317I0104K2N",type="nvme",used="LVM"} 1
+pve_physical_disk_info{devpath="/dev/nvme1n1",model="VV007680KYFFL",node="node01",serial="ADD3NA317I0104K2O",type="nvme",used="LVM"} 1
+pve_physical_disk_info{devpath="/dev/nvme4n1",model="HPE NS204i-u",node="node01",serial="PXTYH0ARHJS080",type="nvme",used="BIOS boot"} 1
+# HELP pve_physical_disk_osd Disk-to-OSD mapping.
+# TYPE pve_physical_disk_osd gauge
+pve_physical_disk_osd{devpath="/dev/nvme0n1",node="node01",osd="osd.8"} 1
+pve_physical_disk_osd{devpath="/dev/nvme1n1",node="node01",osd="osd.9"} 1
+pve_physical_disk_osd{devpath="/dev/nvme1n1",node="node01",osd="osd.10"} 1
+# HELP pve_physical_disk_size_bytes Physical disk size in bytes.
+# TYPE pve_physical_disk_size_bytes gauge
+pve_physical_disk_size_bytes{devpath="/dev/nvme0n1",node="node01"} 7.681501126656e+12
+pve_physical_disk_size_bytes{devpath="/dev/nvme1n1",node="node01"} 7.681501126656e+12
+pve_physical_disk_size_bytes{devpath="/dev/nvme4n1",node="node01"} 4.80036519936e+11
+# HELP pve_physical_disk_wearout_remaining_ratio Wearout remaining as a ratio (1.0 = new).
+# TYPE pve_physical_disk_wearout_remaining_ratio gauge
+pve_physical_disk_wearout_remaining_ratio{devpath="/dev/nvme0n1",node="node01"} 0.95
+pve_physical_disk_wearout_remaining_ratio{devpath="/dev/nvme1n1",node="node01"} 0.88
+`
+
+	if err := testutil.GatherAndCompare(reg, strings.NewReader(expected),
+		"pve_physical_disk_health", "pve_physical_disk_wearout_remaining_ratio",
+		"pve_physical_disk_size_bytes", "pve_physical_disk_info", "pve_physical_disk_osd",
+	); err != nil {
+		t.Errorf("unexpected metrics: %s", err)
+	}
+}
+```
+
+- [ ] **Step 3: Run test to verify it fails**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -run TestPhysicalDiskCollector -v`
+Expected: FAIL — `newPhysicalDiskCollector` not defined.
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add collector/fixtures/node_disks.json collector/physical_disk_test.go
+git commit -m "test: add physical_disk collector test and fixture"
+```
+
+---
+
+### Task 8: Physical Disk Collector — Implementation
+
+**Files:**
+- Create: `collector/physical_disk.go`
+
+- [ ] **Step 1: Implement the collector**
+
+Create `collector/physical_disk.go`:
+
+```go
+package collector
+
+import (
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"sync"
+
+	"github.com/prometheus/client_golang/prometheus"
+)
+
+func init() {
+	registerCollector("physical_disk", func(logger *slog.Logger) Collector {
+		return newPhysicalDiskCollector(logger)
+	})
+}
+
+type physicalDiskCollector struct {
+	logger *slog.Logger
+	mu     sync.Mutex
+	nodes  []string
+}
+
+func newPhysicalDiskCollector(logger *slog.Logger) *physicalDiskCollector {
+	return &physicalDiskCollector{logger: logger}
+}
+
+func (c *physicalDiskCollector) SetNodes(nodes []string) {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.nodes = nodes
+}
+
+type diskListResponse struct {
+	Data []diskEntry `json:"data"`
+}
+
+type diskEntry struct {
+	DevPath string          `json:"devpath"`
+	Health  string          `json:"health"`
+	Wearout json.RawMessage `json:"wearout"`
+	Size    float64         `json:"size"`
+	Model   string          `json:"model"`
+	Serial  string          `json:"serial"`
+	Type    string          `json:"type"`
+	Used    string          `json:"used"`
+	OSDList []string        `json:"osdid-list"`
+}
+
+var (
+	physDiskHealthDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "physical_disk", "health"),
+		"1 if SMART health is PASSED, 0 otherwise.",
+		[]string{"node", "devpath", "model", "serial", "type"}, nil,
+	)
+	physDiskWearoutDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "physical_disk", "wearout_remaining_ratio"),
+		"Wearout remaining as a ratio (1.0 = new).",
+		[]string{"node", "devpath"}, nil,
+	)
+	physDiskSizeDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "physical_disk", "size_bytes"),
+		"Physical disk size in bytes.",
+		[]string{"node", "devpath"}, nil,
+	)
+	physDiskInfoDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "physical_disk", "info"),
+		"Physical disk information.",
+		[]string{"node", "devpath", "model", "serial", "type", "used"}, nil,
+	)
+	physDiskOSDDesc = prometheus.NewDesc(
+		prometheus.BuildFQName(namespace, "physical_disk", "osd"),
+		"Disk-to-OSD mapping.",
+		[]string{"node", "devpath", "osd"}, nil,
+	)
+)
+
+func (c *physicalDiskCollector) Update(client *Client, ch chan<- prometheus.Metric) error {
+	c.mu.Lock()
+	nodes := make([]string, len(c.nodes))
+	copy(nodes, c.nodes)
+	c.mu.Unlock()
+
+	var (
+		wg   sync.WaitGroup
+		errs []error
+		emu  sync.Mutex
+	)
+
+	sem := make(chan struct{}, client.MaxConcurrent())
+
+	for _, node := range nodes {
+		wg.Add(1)
+		go func(node string) {
+			defer wg.Done()
+			sem <- struct{}{}
+			defer func() { <-sem }()
+
+			if err := c.collectNode(client, ch, node); err != nil {
+				emu.Lock()
+				errs = append(errs, err)
+				emu.Unlock()
+			}
+		}(node)
+	}
+	wg.Wait()
+
+	if len(errs) > 0 {
+		return fmt.Errorf("physical_disk collection errors: %v", errs)
+	}
+	return nil
+}
+
+func (c *physicalDiskCollector) collectNode(client *Client, ch chan<- prometheus.Metric, node string) error {
+	body, err := client.Get(fmt.Sprintf("/nodes/%s/disks/list", node))
+	if err != nil {
+		return fmt.Errorf("failed to get disks for node %s: %w", node, err)
+	}
+
+	var resp diskListResponse
+	if err := json.Unmarshal(body, &resp); err != nil {
+		return fmt.Errorf("failed to parse disks response for node %s: %w", node, err)
+	}
+
+	for _, disk := range resp.Data {
+		// Health: 1 if PASSED, 0 otherwise.
+		var health float64
+		if disk.Health == "PASSED" {
+			health = 1
+		}
+		ch <- prometheus.MustNewConstMetric(physDiskHealthDesc, prometheus.GaugeValue, health,
+			node, disk.DevPath, disk.Model, disk.Serial, disk.Type)
+
+		// Wearout: try to parse as number. Skip if "N/A" or not a number.
+		if len(disk.Wearout) > 0 {
+			var wearout float64
+			if err := json.Unmarshal(disk.Wearout, &wearout); err == nil {
+				ch <- prometheus.MustNewConstMetric(physDiskWearoutDesc, prometheus.GaugeValue, wearout/100, node, disk.DevPath)
+			}
+		}
+
+		// Size.
+		ch <- prometheus.MustNewConstMetric(physDiskSizeDesc, prometheus.GaugeValue, disk.Size, node, disk.DevPath)
+
+		// Info.
+		ch <- prometheus.MustNewConstMetric(physDiskInfoDesc, prometheus.GaugeValue, 1,
+			node, disk.DevPath, disk.Model, disk.Serial, disk.Type, disk.Used)
+
+		// OSD mapping.
+		for _, osdID := range disk.OSDList {
+			osd := fmt.Sprintf("osd.%s", osdID)
+			ch <- prometheus.MustNewConstMetric(physDiskOSDDesc, prometheus.GaugeValue, 1, node, disk.DevPath, osd)
+		}
+	}
+
+	return nil
+}
+```
+
+- [ ] **Step 2: Run tests**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -run TestPhysicalDiskCollector -v`
+Expected: PASS
+
+- [ ] **Step 3: Run all tests**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -v`
+Expected: All tests PASS
+
+- [ ] **Step 4: Commit**
+
+```bash
+git add collector/physical_disk.go
+git commit -m "feat: add physical_disk collector (health, wearout, size, OSD mapping)"
+```
+
+---
+
+### Task 9: Update README
+
+**Files:**
+- Modify: `README.md`
+
+- [ ] **Step 1: Add new metrics tables to README**
+
+Add sections for all 4 new collectors after the existing Subscription section, following the same table format. Each section should have the metrics table matching the spec.
+
+- [ ] **Step 2: Remove TODO section**
+
+Remove the entire "TODO: Future Metrics" section from the README since all planned metrics are now implemented (SDN, kernel version, and CPU model were excluded by design).
+
+- [ ] **Step 3: Run all tests one final time**
+
+Run: `cd /home/user/git/pve-exporter && go test ./collector/ -v`
+Expected: All tests PASS
+
+- [ ] **Step 4: Build to verify**
+
+Run: `cd /home/user/git/pve-exporter && make build`
+Expected: Binary builds successfully
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add README.md
+git commit -m "docs: update README with new collector metrics, remove TODO section"
+```