fix: resolve deadlock in node_config collector causing request exhaustion
The outer goroutine per-node acquired a semaphore slot and held it while collectNode spawned inner goroutines needing slots from the same semaphore. With maxConc=5 and 5+ nodes, all slots were consumed by outer goroutines, inner goroutines blocked forever, and Collect() never returned — permanently consuming an HTTP MaxRequestsInFlight slot until the server stopped responding. Remove the redundant outer semaphore acquire (inner goroutines already manage their own slots) and add a 120s HTTP timeout as defense-in-depth. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
5e066a5c4b
commit
3bad7963af
2 changed files with 12 additions and 9 deletions
|
|
@ -75,8 +75,6 @@ func (c *nodeConfigCollector) Update(client *Client, ch chan<- prometheus.Metric
|
|||
wg.Add(1)
|
||||
go func(node string) {
|
||||
defer wg.Done()
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
|
||||
if err := c.collectNode(client, ch, node, sem); err != nil {
|
||||
emu.Lock()
|
||||
|
|
|
|||
7
main.go
7
main.go
|
|
@ -6,6 +6,7 @@ import (
|
|||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/alecthomas/kingpin/v2"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
|
|
@ -67,13 +68,17 @@ func main() {
|
|||
registry.MustRegister(versioncollector.NewCollector("pve_exporter"))
|
||||
registry.MustRegister(pveCollector)
|
||||
|
||||
http.Handle(*metricsPath, promhttp.HandlerFor(
|
||||
http.Handle(*metricsPath, http.TimeoutHandler(
|
||||
promhttp.HandlerFor(
|
||||
registry,
|
||||
promhttp.HandlerOpts{
|
||||
ErrorLog: slog.NewLogLogger(logger.Handler(), slog.LevelError),
|
||||
ErrorHandling: promhttp.ContinueOnError,
|
||||
MaxRequestsInFlight: 5,
|
||||
},
|
||||
),
|
||||
120*time.Second,
|
||||
"Scrape timed out",
|
||||
))
|
||||
|
||||
if *metricsPath != "/" {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue