diff --git a/eval/README.md b/simulations/README.md similarity index 89% rename from eval/README.md rename to simulations/README.md index 6a2d4d7..a414f9f 100644 --- a/eval/README.md +++ b/simulations/README.md @@ -6,18 +6,18 @@ Scenario-based evaluation for pumpingStation. Each scenario scripts a stream of ```bash # One scenario -node eval/run.js levelbased-steady +node simulations/run.js levelbased-steady # All scenarios at once -node eval/run.js --all +node simulations/run.js --all ``` -Per-tick records are written to `eval/logs/.jsonl` for post-hoc analysis (e.g. streaming into InfluxDB for Grafana, or pandas / jq for one-off exploration). +Per-tick records are written to `simulations/logs/.jsonl` for post-hoc analysis (e.g. streaming into InfluxDB for Grafana, or pandas / jq for one-off exploration). ## Scenario file shape ```js -// eval/scenarios/.js +// simulations/scenarios/.js module.exports = { name: 'scenario-identifier', description: 'one sentence — what the scenario is testing', @@ -89,22 +89,22 @@ Duration: 1200s, 1s ticks ✓ level stays above outflow: min level = 2.00 m (bound: ≥ 0.2) ✓ no threshold issues on init: 0 threshold issues at startup (expected 0) -Log: eval/logs/levelbased-steady.jsonl (1200 records) +Log: simulations/logs/levelbased-steady.jsonl (1200 records) ✅ PASS ``` ## Why separate from `test/`? -| | `test/` | `eval/` | +| | `test/` | `simulations/` | |---|---|---| -| runner | `node --test` | `node eval/run.js` | +| runner | `node --test` | `node simulations/run.js` | | scope | one function / small behaviour | end-to-end scenario over time | | duration | milliseconds | seconds to minutes (simulated) | | assertion style | tight, exact (`assert.equal`) | tolerance / bounds / event counts | | output | TAP | summary table + JSONL for analysis | | purpose | catch regressions | analyse how the system responds to input | -Unit tests live under `test/basic/`, `test/integration/`, `test/edge/`. Scenarios live here under `eval/scenarios/`. +Unit tests live under `test/basic/`, `test/integration/`, `test/edge/`. Scenarios live here under `simulations/scenarios/`. ## Sending logs to Grafana (optional) @@ -116,7 +116,7 @@ jq -c '{ tags: { scenario: "'$SCENARIO'" }, fields: { level: .level, volume: .volume, demand: .percControl, safety: (.safetyActive|if . then 1 else 0 end) }, timestamp: (.t | tonumber | . * 1000000000) -}' eval/logs/$SCENARIO.jsonl \ +}' simulations/logs/$SCENARIO.jsonl \ | influx write --bucket=telemetry ... ``` diff --git a/eval/formatters/table.js b/simulations/formatters/table.js similarity index 97% rename from eval/formatters/table.js rename to simulations/formatters/table.js index 7613e4d..ab35799 100644 --- a/eval/formatters/table.js +++ b/simulations/formatters/table.js @@ -1,5 +1,5 @@ // ASCII table summary of scenario samples. -// Used by eval/run.js. +// Used by simulations/run.js. function pad(s, n, left = false) { s = String(s ?? ''); diff --git a/simulations/logs/.gitignore b/simulations/logs/.gitignore new file mode 100644 index 0000000..9e3ae93 --- /dev/null +++ b/simulations/logs/.gitignore @@ -0,0 +1,2 @@ +*.jsonl +!.gitignore diff --git a/eval/run.js b/simulations/run.js similarity index 91% rename from eval/run.js rename to simulations/run.js index 2a2a235..fb42150 100644 --- a/eval/run.js +++ b/simulations/run.js @@ -1,14 +1,14 @@ #!/usr/bin/env node // Scenario runner for pumpingStation. Usage: // -// node eval/run.js # run one -// node eval/run.js --all # run all scenarios +// node simulations/run.js # run one +// node simulations/run.js --all # run all scenarios // -// Each scenario lives in eval/scenarios/.js and exports: +// Each scenario lives in simulations/scenarios/.js and exports: // { name, description, durationSec, config, setup?, inputs, expectations? } // // The runner ticks the station once per simulated second, records every -// state into eval/logs/.jsonl, prints a summary table + event log, +// state into simulations/logs/.jsonl, prints a summary table + event log, // and checks expectations. const path = require('path'); @@ -102,7 +102,9 @@ async function runScenario(name) { if (scenario.setup) await scenario.setup(ps); const duration = scenario.durationSec ?? 600; - const logPath = path.join(__dirname, 'logs', `${scenario.name}.jsonl`); + const logDir = path.join(__dirname, 'logs'); + fs.mkdirSync(logDir, { recursive: true }); + const logPath = path.join(logDir, `${scenario.name}.jsonl`); const log = fs.createWriteStream(logPath); const records = []; @@ -115,7 +117,8 @@ async function runScenario(name) { records.push(snap); log.write(JSON.stringify(snap) + '\n'); } - log.end(); + // Drain so the file is fully written before we return. + await new Promise((resolve, reject) => { log.end(); log.on('finish', resolve); log.on('error', reject); }); return { ps, records, scenario, duration, logPath }; } finally { @@ -174,7 +177,7 @@ async function runAndReport(name) { async function main() { const arg = process.argv[2]; if (!arg) { - console.error('Usage: node eval/run.js | --all'); + console.error('Usage: node simulations/run.js | --all'); console.error('Available:', fs.readdirSync(path.join(__dirname, 'scenarios')).map((f) => f.replace(/\.js$/, '')).join(', ')); process.exit(1); } diff --git a/eval/scenarios/levelbased-steady.js b/simulations/scenarios/levelbased-steady.js similarity index 100% rename from eval/scenarios/levelbased-steady.js rename to simulations/scenarios/levelbased-steady.js diff --git a/eval/scenarios/levelbased-storm.js b/simulations/scenarios/levelbased-storm.js similarity index 100% rename from eval/scenarios/levelbased-storm.js rename to simulations/scenarios/levelbased-storm.js diff --git a/eval/scenarios/safety-dry-run-trip.js b/simulations/scenarios/safety-dry-run-trip.js similarity index 100% rename from eval/scenarios/safety-dry-run-trip.js rename to simulations/scenarios/safety-dry-run-trip.js diff --git a/wiki/modes/mpc.md b/wiki/modes/mpc.md index aa94715..23224a6 100644 --- a/wiki/modes/mpc.md +++ b/wiki/modes/mpc.md @@ -78,7 +78,7 @@ Blocks: ## Diagram 2 — scenario time-series -A much more useful way to evaluate MPC is to plot *what it did* over a simulated scenario: level, planned vs actual demand, the cost function breakdown, the active constraints. The [eval harness](../../eval/README.md) is built for exactly this — MPC will need a dedicated scenario like `mpc-storm-with-forecast.js`. +A much more useful way to evaluate MPC is to plot *what it did* over a simulated scenario: level, planned vs actual demand, the cost function breakdown, the active constraints. The [simulations harness](../../simulations/README.md) is built for exactly this — MPC will need a dedicated scenario like `mpc-storm-with-forecast.js`. ``` Placeholder — replace with: @@ -146,4 +146,4 @@ demand = plan.command[0] - [Functional description](../functional-description.md) — basin model + safety layer - [modes/levelbased.md](levelbased.md) — Tier 1 — the "default" MPC falls back to - [modes/powerbased.md](powerbased.md) — Tier 2 — MPC generalises the clip idea into full optimisation -- [eval/README.md](../../eval/README.md) — where MPC evaluation scenarios will live +- [simulations/README.md](../../simulations/README.md) — where MPC simulation scenarios will live