diff --git a/nodes/machineGroupControl b/nodes/machineGroupControl index 96b84d3..df74ea0 160000 --- a/nodes/machineGroupControl +++ b/nodes/machineGroupControl @@ -1 +1 @@ -Subproject commit 96b84d312491687bc9afde60de8a33ef7d65635f +Subproject commit df74ea0facd750a37ae7027dac40412079015097 diff --git a/test/mgc-overactive-demand-serialization.integration.test.js b/test/mgc-overactive-demand-serialization.integration.test.js new file mode 100644 index 0000000..6580c70 --- /dev/null +++ b/test/mgc-overactive-demand-serialization.integration.test.js @@ -0,0 +1,121 @@ +// Regression: MGC must serialize concurrent handleInput calls. +// +// Live observation (2026-05-09): PS sends a fresh demand% every 1 s as +// basin level drifts. Each MGC.handleInput unconditionally calls +// abortActiveMovements — so an in-flight pump ramp gets killed, the +// pump's setpoint is replaced, the new ramp gets killed by the next +// tick, and the loop never settles. Real symptom: 120 +// "Aborting active movements ..." log lines per 2 min while a single +// pump randomly leads (138 m³/h) and the others are clamped at minFlow +// (60 m³/h, "near_curve_edge"). +// +// Proper design (mirrors rotatingMachine state.delayedMove): when a +// handleInput is already in-flight (pumps still moving), save the new +// demand to a delayed slot and return. When the in-flight dispatch +// finishes, pick up the latest delayed demand. Latest-wins — +// intermediate values are stomped because they were obsolete by the +// time the pumps were ready for them. +// +// Fail mode this catches: any future change that re-introduces +// concurrent handleInput entries (or removes the gate) will explode the +// abort count and leave pumps unbalanced. + +const test = require('node:test'); +const assert = require('node:assert/strict'); +const { buildPlant, injectPumpPressure } = require('./lib/wiring'); + +test('MGC serializes overactive demand — one dispatch in flight at a time, latest queued for pickup', async () => { + const plant = buildPlant({ initialBasinLevel: 2.6 }); + const { mgc, pumps, restore } = plant; + try { + // Realistic ramp time so the in-flight window is wide enough that + // multiple PS calls land during it. + for (const p of pumps) { + p.state.config.time = { starting: 1, warmingup: 1, stopping: 1, coolingdown: 1 }; + injectPumpPressure(p, 19620, 117720); + } + // Bring pumps to operational once so the test focuses on + // STEADY-STATE thrash (not startup). + for (const p of pumps) await p.handleInput('parent', 'execsequence', 'startup'); + + // Count how many times MGC actually issues an abort. Wrap the + // existing method so the contract is enforced regardless of + // implementation details. + let abortCount = 0; + const originalAbort = mgc.abortActiveMovements.bind(mgc); + mgc.abortActiveMovements = async (reason) => { + abortCount += 1; + return originalAbort(reason); + }; + + // Simulate PS jitter: 30 demand calls fired back-to-back without + // awaiting (mirrors PS._applyMachineGroupLevelControl firing into + // an MGC whose previous handleInput is still settling pumps). + // Tiny percControl drift around 14 % matches what we observed live. + const calls = []; + const baseDemand = 14; + for (let i = 0; i < 30; i++) { + const d = baseDemand + (i % 5) * 0.05; + // Fire-and-forget — the gate must absorb the burst. + calls.push(mgc.handleInput('parent', d).catch(() => {})); + } + await Promise.all(calls); + // Let any deferred pickup settle. + await new Promise((r) => setImmediate(r)); + await new Promise((r) => setImmediate(r)); + + // Contract: with a serialization gate, concurrent burst yields at + // most TWO real dispatches (the first that wins entry, plus one + // queued pickup carrying the latest value). Without the gate, every + // call aborts → abortCount equals call count. + // + // We assert ≤ 5 to allow for legitimate sequential dispatch that + // span a few ticks but block the runaway-thrash mode. + assert.ok(abortCount <= 5, + `MGC issued ${abortCount} aborts for 30 concurrent demand calls — gate not serializing dispatches (live system showed 1 abort/sec / 120 per 2 min with this exact bug).`); + + // Whatever combination the optimizer picks, all selected pumps + // must reach a non-floor ctrl. Pump_b stuck at the curve floor was + // the live failure — the gate fixes it because the ramp completes + // before the next demand starts. + const finalCtrls = pumps.map((p) => Number(p.state.getCurrentPosition?.()) || 0); + const activePumps = finalCtrls.filter((c) => c > 0); + if (activePumps.length >= 2) { + const min = Math.min(...activePumps); + const max = Math.max(...activePumps); + assert.ok(max / Math.max(min, 1) < 3, + `active pump ctrls=${activePumps.map((c) => c.toFixed(1))} — disparity > 3× indicates one pump clamped at curve floor (the live "138 vs 60" symptom).`); + } + } finally { + restore(); + } +}); + +test('MGC serialization preserves latest-wins semantic — intermediate values stomped, last value applied', async () => { + const plant = buildPlant({ initialBasinLevel: 2.6 }); + const { mgc, pumps, restore } = plant; + try { + for (const p of pumps) { + p.state.config.time = { starting: 1, warmingup: 1, stopping: 1, coolingdown: 1 }; + injectPumpPressure(p, 19620, 117720); + } + for (const p of pumps) await p.handleInput('parent', 'execsequence', 'startup'); + + // Fire 10 demands quickly: 25, 50, 25, 50, ..., 100. Final must be 100. + const sequence = [25, 50, 25, 50, 25, 50, 25, 50, 25, 100]; + const calls = sequence.map((d) => mgc.handleInput('parent', d).catch(() => {})); + await Promise.all(calls); + // Allow one extra event-loop turn for the deferred pickup. + await new Promise((r) => setImmediate(r)); + await new Promise((r) => setImmediate(r)); + + // After settling, the LATEST demand (100 %) wins — pumps should be + // at high ctrl, not stuck on the first burst-leader value. + const finalCtrls = pumps.map((p) => Number(p.state.getCurrentPosition?.()) || 0); + const maxCtrl = Math.max(...finalCtrls); + assert.ok(maxCtrl > 70, + `latest demand was 100 % but max pump ctrl=${maxCtrl.toFixed(1)} — gate is dropping the queued value instead of picking it up.`); + } finally { + restore(); + } +});