pve-exporter/main.go
Davíð Steinn Geirsson 3bad7963af fix: resolve deadlock in node_config collector causing request exhaustion
The outer goroutine per-node acquired a semaphore slot and held it while
collectNode spawned inner goroutines needing slots from the same semaphore.
With maxConc=5 and 5+ nodes, all slots were consumed by outer goroutines,
inner goroutines blocked forever, and Collect() never returned — permanently
consuming an HTTP MaxRequestsInFlight slot until the server stopped responding.

Remove the redundant outer semaphore acquire (inner goroutines already manage
their own slots) and add a 120s HTTP timeout as defense-in-depth.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-24 11:30:54 +00:00

139 lines
3.8 KiB
Go

package main
import (
"fmt"
"log/slog"
"net/http"
"os"
"strings"
"time"
"github.com/alecthomas/kingpin/v2"
"github.com/prometheus/client_golang/prometheus"
versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/promslog"
promslogflag "github.com/prometheus/common/promslog/flag"
"github.com/prometheus/exporter-toolkit/web"
"github.com/prometheus/exporter-toolkit/web/kingpinflag"
"github.com/dsgeis/pve-exporter/collector"
)
func main() {
var (
pveHosts = kingpin.Flag(
"pve.host",
"PVE host base URL (e.g. https://pve1:8006). May be repeated for failover.",
).Required().Strings()
pveAPIToken = kingpin.Flag(
"pve.api-token",
"PVE API token (USER@REALM!TOKENID=SECRET).",
).String()
pveTokenFile = kingpin.Flag(
"pve.token-file",
"Path to file containing the PVE API token.",
).String()
pveTLSInsecure = kingpin.Flag(
"pve.tls-insecure",
"Disable TLS certificate verification for PVE API.",
).Default("false").Bool()
pveMaxConcurrent = kingpin.Flag(
"pve.max-concurrent",
"Maximum number of concurrent API requests to PVE.",
).Default("5").Int()
metricsPath = kingpin.Flag(
"web.telemetry-path",
"Path under which to expose metrics.",
).Default("/metrics").String()
toolkitFlags = kingpinflag.AddFlags(kingpin.CommandLine, ":9221")
)
promslogConfig := &promslog.Config{}
promslogflag.AddFlags(kingpin.CommandLine, promslogConfig)
kingpin.HelpFlag.Short('h')
kingpin.Parse()
logger := promslog.New(promslogConfig)
token, err := resolveToken(*pveAPIToken, *pveTokenFile)
if err != nil {
logger.Error("failed to resolve API token", "err", err)
os.Exit(1)
}
client := collector.NewClient(*pveHosts, token, *pveTLSInsecure, *pveMaxConcurrent)
pveCollector := collector.NewPVECollector(client, logger)
registry := prometheus.NewRegistry()
registry.MustRegister(versioncollector.NewCollector("pve_exporter"))
registry.MustRegister(pveCollector)
http.Handle(*metricsPath, http.TimeoutHandler(
promhttp.HandlerFor(
registry,
promhttp.HandlerOpts{
ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError),
ErrorHandling: promhttp.ContinueOnError,
MaxRequestsInFlight: 5,
},
),
120*time.Second,
"Scrape timed out",
))
if *metricsPath != "/" {
landingConfig := web.LandingConfig{
Name: "PVE Exporter",
Description: "Prometheus Exporter for Proxmox VE",
Links: []web.LandingLinks{
{
Address: *metricsPath,
Text: "Metrics",
},
},
}
landingPage, err := web.NewLandingPage(landingConfig)
if err != nil {
logger.Error("failed to create landing page", "err", err)
os.Exit(1)
}
http.Handle("/", landingPage)
}
server := &http.Server{}
if err := web.ListenAndServe(server, toolkitFlags, logger); err != nil {
logger.Error("HTTP server error", "err", err)
os.Exit(1)
}
}
// resolveToken determines the API token from flags or environment.
// Exactly one of apiToken or tokenFile may be set; if neither is set,
// the PVE_API_TOKEN environment variable is used as a fallback.
func resolveToken(apiToken, tokenFile string) (string, error) {
if apiToken != "" && tokenFile != "" {
return "", fmt.Errorf("--pve.api-token and --pve.token-file are mutually exclusive")
}
if apiToken != "" {
return apiToken, nil
}
if tokenFile != "" {
data, err := os.ReadFile(tokenFile)
if err != nil {
return "", fmt.Errorf("reading token file: %w", err)
}
token := strings.TrimSpace(string(data))
if token == "" {
return "", fmt.Errorf("token file %s is empty", tokenFile)
}
return token, nil
}
if envToken := os.Getenv("PVE_API_TOKEN"); envToken != "" {
return envToken, nil
}
return "", fmt.Errorf("no API token provided: use --pve.api-token, --pve.token-file, or PVE_API_TOKEN env var")
}